Browse Source

nxm: inclusion of the ape subsystem and changes to build it

With this, ape builds and its tools run properly
in NxM. I've added two tools required to build
ape: util/^(cpp/ pcc.c), both taken from
sys/src/cmd.

Modifications:
util/cpp - almost no changes from sys/src/cmd/cpp;
	just function renaming and mods to the mkfile.
util/pcc.c - several changes to support $NXM root:
	-r$path sets root path; all exec paths are made
	relative instead of absolute.
util/6l/obj.c - changed to make its final search path
	be $NXM/amd64/lib, instead of /amd64/lib...
amd64/include/ape, sys/include/ape - ... so that
	we can make all the ape #pragma lib declarations
	relative paths instead of absolute.
sys/src/ape - only changed mkfiles and config
sys/src/mkfile - now we build the ape subsystem
util/BUILD - we also build cpp and pcc to support ape
.gitignore - ignore some ape boogers

BUGS:
Ape build not yet tested from within NxM. It should not
require anything more than appropriate changes to
sys/src/ape/config file, however.

Change-Id: I54c5eae82e8c6849b056e366624b56a3d32a3c03
Signed-off-by: Akshat Kumar <seed@mail.nanosouffle.net>
Reviewed-on: http://nxm.coreboot.org/review/86
Tested-by: Jenkins QA
Reviewed-by: ron minnich <rminnich@gmail.com>
Akshat Kumar 12 years ago
parent
commit
04c3801efd
100 changed files with 27396 additions and 50 deletions
  1. 5 0
      .gitignore
  2. 1 1
      amd64/include/ape/math.h
  3. 2 2
      amd64/mkfile
  4. 1 1
      sys/include/ape/Plan9libnet.h
  5. 1 1
      sys/include/ape/assert.h
  6. 1 1
      sys/include/ape/bsd.h
  7. 1 1
      sys/include/ape/ctype.h
  8. 1 1
      sys/include/ape/cursor.h
  9. 1 1
      sys/include/ape/dirent.h
  10. 1 1
      sys/include/ape/draw.h
  11. 1 1
      sys/include/ape/errno.h
  12. 1 1
      sys/include/ape/error.h
  13. 1 1
      sys/include/ape/event.h
  14. 1 1
      sys/include/ape/fcntl.h
  15. 1 1
      sys/include/ape/fmt.h
  16. 1 1
      sys/include/ape/grp.h
  17. 1 1
      sys/include/ape/lib9.h
  18. 1 1
      sys/include/ape/libl.h
  19. 1 1
      sys/include/ape/libnet.h
  20. 1 1
      sys/include/ape/libv.h
  21. 1 1
      sys/include/ape/locale.h
  22. 1 1
      sys/include/ape/lock.h
  23. 1 1
      sys/include/ape/netdb.h
  24. 1 1
      sys/include/ape/pwd.h
  25. 1 1
      sys/include/ape/qlock.h
  26. 1 1
      sys/include/ape/regexp.h
  27. 1 1
      sys/include/ape/select.h
  28. 1 1
      sys/include/ape/setjmp.h
  29. 1 1
      sys/include/ape/signal.h
  30. 2 2
      sys/include/ape/stdio.h
  31. 1 1
      sys/include/ape/stdlib.h
  32. 1 1
      sys/include/ape/string.h
  33. 1 1
      sys/include/ape/sys/pty.h
  34. 1 1
      sys/include/ape/sys/select.h
  35. 1 1
      sys/include/ape/sys/socket.h
  36. 2 2
      sys/include/ape/sys/stat.h
  37. 1 1
      sys/include/ape/sys/time.h
  38. 1 1
      sys/include/ape/sys/times.h
  39. 1 1
      sys/include/ape/sys/types.h
  40. 1 1
      sys/include/ape/sys/uio.h
  41. 1 1
      sys/include/ape/sys/utsname.h
  42. 1 1
      sys/include/ape/sys/wait.h
  43. 1 1
      sys/include/ape/termios.h
  44. 1 1
      sys/include/ape/time.h
  45. 1 1
      sys/include/ape/unistd.h
  46. 2 2
      sys/include/ape/utf.h
  47. 1 1
      sys/include/ape/utime.h
  48. 10 0
      sys/src/ape/9src/mkfile
  49. 266 0
      sys/src/ape/9src/stty.c
  50. 127 0
      sys/src/ape/9src/tar.c
  51. 127 0
      sys/src/ape/9src/tty.h
  52. 189 0
      sys/src/ape/cmd/README
  53. 45 0
      sys/src/ape/cmd/basename.c
  54. 387 0
      sys/src/ape/cmd/cc.c
  55. 339 0
      sys/src/ape/cmd/diff/COPYING
  56. 1766 0
      sys/src/ape/cmd/diff/ChangeLog
  57. 24 0
      sys/src/ape/cmd/diff/FREEBSD-upgrade
  58. 126 0
      sys/src/ape/cmd/diff/NEWS
  59. 9 0
      sys/src/ape/cmd/diff/README
  60. 1084 0
      sys/src/ape/cmd/diff/analyze.c
  61. 40 0
      sys/src/ape/cmd/diff/cmpbuf.c
  62. 20 0
      sys/src/ape/cmd/diff/cmpbuf.h
  63. 118 0
      sys/src/ape/cmd/diff/config.h
  64. 468 0
      sys/src/ape/cmd/diff/context.c
  65. 71 0
      sys/src/ape/cmd/diff/diagmeet.note
  66. 1124 0
      sys/src/ape/cmd/diff/diff.c
  67. 344 0
      sys/src/ape/cmd/diff/diff.h
  68. 3916 0
      sys/src/ape/cmd/diff/diff.texi
  69. 1778 0
      sys/src/ape/cmd/diff/diff3.c
  70. 216 0
      sys/src/ape/cmd/diff/dir.c
  71. 200 0
      sys/src/ape/cmd/diff/ed.c
  72. 181 0
      sys/src/ape/cmd/diff/fnmatch.c
  73. 40 0
      sys/src/ape/cmd/diff/fnmatch.h
  74. 748 0
      sys/src/ape/cmd/diff/getopt.c
  75. 129 0
      sys/src/ape/cmd/diff/getopt.h
  76. 180 0
      sys/src/ape/cmd/diff/getopt1.c
  77. 428 0
      sys/src/ape/cmd/diff/ifdef.c
  78. 238 0
      sys/src/ape/cmd/diff/install-sh
  79. 714 0
      sys/src/ape/cmd/diff/io.c
  80. 50 0
      sys/src/ape/cmd/diff/mkfile
  81. 71 0
      sys/src/ape/cmd/diff/normal.c
  82. 87 0
      sys/src/ape/cmd/diff/prepend_args.c
  83. 21 0
      sys/src/ape/cmd/diff/prepend_args.h
  84. 6374 0
      sys/src/ape/cmd/diff/regex.c
  85. 510 0
      sys/src/ape/cmd/diff/regex.h
  86. 1109 0
      sys/src/ape/cmd/diff/sdiff.c
  87. 284 0
      sys/src/ape/cmd/diff/side.c
  88. 273 0
      sys/src/ape/cmd/diff/system.h
  89. 759 0
      sys/src/ape/cmd/diff/util.c
  90. 5 0
      sys/src/ape/cmd/diff/version.c
  91. 81 0
      sys/src/ape/cmd/diff/xmalloc.c
  92. 38 0
      sys/src/ape/cmd/dirname.c
  93. 295 0
      sys/src/ape/cmd/expr/expr.y
  94. 14 0
      sys/src/ape/cmd/expr/mkfile
  95. 410 0
      sys/src/ape/cmd/expr/regexp.h
  96. 75 0
      sys/src/ape/cmd/kill.c
  97. 213 0
      sys/src/ape/cmd/make/defs.h
  98. 380 0
      sys/src/ape/cmd/make/doname.c
  99. 288 0
      sys/src/ape/cmd/make/dosys.c
  100. 552 0
      sys/src/ape/cmd/make/files.c

+ 5 - 0
.gitignore

@@ -15,10 +15,12 @@ xbin/6a
 xbin/6c
 xbin/6l
 xbin/ar
+xbin/cpp
 xbin/data2s
 xbin/mksys
 xbin/mklatinkbd
 xbin/nm
+xbin/pcc
 6.out
 6.*
 o.out
@@ -87,6 +89,9 @@ sys/src/cmd/rc/x.tab.h
 sys/src/cmd/upas/smtp/rfc822.tab.c
 sys/src/cmd/upas/smtp/smtpd.tab.c
 sys/src/libscribble/syms
+sys/src/ape/lib/ap/syscall/*.s
+sys/src/ape/lib/ap/syscall/sys.h
+sys/src/ape/cmd/make/gram.c
 
 #random?
 Users/

+ 1 - 1
amd64/include/ape/math.h

@@ -1,6 +1,6 @@
 #ifndef __MATH
 #define __MATH
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 /* a HUGE_VAL appropriate for IEEE double-precision */
 /* the correct value, 1.797693134862316e+308, causes a ken overflow */

+ 2 - 2
amd64/mkfile

@@ -4,8 +4,8 @@ MKSHELL=rc
 CC=6c
 GC=6g
 LD=6l
-LDFLAGS=-L $NXM/$objtype/lib
 O=6
 AS=6a
-CFLAGS=-I $NXM/amd64/include -I $NXM/sys/include -I $NXM
+LDFLAGS=-L$NXM/$objtype/lib
+CFLAGS=-I $NXM/$objtype/include -I $NXM/sys/include -I $NXM
 MKSHELL=rc

+ 1 - 1
sys/include/ape/Plan9libnet.h

@@ -3,7 +3,7 @@
 #ifndef _NET_EXTENSION
    This header file is not defined in ANSI or POSIX
 #endif
-#pragma lib "/$M/lib/ape/libnet.a"
+#pragma lib "ape/libnet.a"
 
 #define NETPATHLEN 40
 

+ 1 - 1
sys/include/ape/assert.h

@@ -1,4 +1,4 @@
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #undef assert
 #ifdef NDEBUG

+ 1 - 1
sys/include/ape/bsd.h

@@ -5,7 +5,7 @@
 #ifndef __BSD_H_
 #define __BSD_H_
 #pragma src "/sys/src/ape/lib/bsd"
-#pragma lib "/$M/lib/ape/libbsd.a"
+#pragma lib "ape/libbsd.a"
 
 #ifndef __TYPES_H
 #include <sys/types.h>

+ 1 - 1
sys/include/ape/ctype.h

@@ -1,6 +1,6 @@
 #ifndef __CTYPE
 #define __CTYPE
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #ifdef __cplusplus
 extern "C" {

+ 1 - 1
sys/include/ape/cursor.h

@@ -4,7 +4,7 @@
 
 #ifndef	__CURSOR_H_
 #define	__CURSOR_H_
-#include "/sys/include/cursor.h"
+#include "../cursor.h"
 
 #endif
 

+ 1 - 1
sys/include/ape/dirent.h

@@ -1,6 +1,6 @@
 #ifndef	__DIRENT_H
 #define	__DIRENT_H
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 /*
  * this must be a power of 2 and a multiple of all the ones in the system
  */

+ 1 - 1
sys/include/ape/draw.h

@@ -5,7 +5,7 @@
 #ifndef	__DRAW_H_
 #define	__DRAW_H_
 #pragma src "/sys/src/ape/lib/draw"
-#pragma lib "/$M/lib/ape/libdraw.a"
+#pragma lib "ape/libdraw.a"
 
 #include <u.h>
 #include <fmt.h>

+ 1 - 1
sys/include/ape/errno.h

@@ -1,6 +1,6 @@
 #ifndef __ERRNO
 #define __ERRNO
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 extern int errno;
 

+ 1 - 1
sys/include/ape/error.h

@@ -3,7 +3,7 @@
 #ifndef _RESEARCH_SOURCE
    This header file is not defined in pure ANSI or POSIX
 #endif
-#pragma lib "/$M/lib/ape/libv.a"
+#pragma lib "ape/libv.a"
 
 #ifdef __cplusplus
 extern "C" {

+ 1 - 1
sys/include/ape/event.h

@@ -4,7 +4,7 @@
 
 #ifndef	__EVENT_H_
 #define	__EVENT_H_
-#include "/sys/include/event.h"
+#include "../event.h"
 
 #endif
 

+ 1 - 1
sys/include/ape/fcntl.h

@@ -3,7 +3,7 @@
 #ifndef _POSIX_SOURCE
    This header file is not defined in pure ANSI
 #endif
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #include	<sys/types.h>
 

+ 1 - 1
sys/include/ape/fmt.h

@@ -5,7 +5,7 @@
 #ifndef __FMT_H_
 #define __FMT_H_
 #pragma src "/sys/src/ape/lib/fmt"
-#pragma lib "/$M/lib/ape/libfmt.a"
+#pragma lib "ape/libfmt.a"
 
 #include <u.h>
 

+ 1 - 1
sys/include/ape/grp.h

@@ -3,7 +3,7 @@
 #ifndef _POSIX_SOURCE
    This header file is not defined in pure ANSI
 #endif
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 #include <sys/types.h>
 
 struct	group {

+ 1 - 1
sys/include/ape/lib9.h

@@ -3,7 +3,7 @@
 #if !defined(_RESEARCH_SOURCE) && !defined(_PLAN9_SOURCE)
     This header file is an extension to ANSI/POSIX
 #endif
-#pragma lib "/$M/lib/ape/lib9.a"
+#pragma lib "ape/lib9.a"
 
 #include <u.h>		/* ick; need Rune defined below */
 

+ 1 - 1
sys/include/ape/libl.h

@@ -3,7 +3,7 @@
 #ifndef _RESEARCH_SOURCE
    This header file is not defined in ANSI or POSIX
 #endif
-#pragma lib "/$M/lib/ape/libl.a"
+#pragma lib "ape/libl.a"
 
 extern int printable(int);
 extern void allprint(char);

+ 1 - 1
sys/include/ape/libnet.h

@@ -3,7 +3,7 @@
 #ifndef _NET_EXTENSION
    This header file is not defined in ANSI or POSIX
 #endif
-#pragma lib "/$M/lib/ape/libnet.a"
+#pragma lib "ape/libnet.a"
 
 #define NETPATHLEN 40
 

+ 1 - 1
sys/include/ape/libv.h

@@ -3,7 +3,7 @@
 #ifndef _RESEARCH_SOURCE
    This header file is not defined in ANSI or POSIX
 #endif
-#pragma lib "/$M/lib/ape/libv.a"
+#pragma lib "ape/libv.a"
 
 #ifdef __cplusplus
 extern "C" {

+ 1 - 1
sys/include/ape/locale.h

@@ -1,6 +1,6 @@
 #ifndef __LOCALE
 #define __LOCALE
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #include <stddef.h>
 

+ 1 - 1
sys/include/ape/lock.h

@@ -4,7 +4,7 @@
 
 #ifndef __LOCK_H
 #define __LOCK_H
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #include <u.h>
 

+ 1 - 1
sys/include/ape/netdb.h

@@ -5,7 +5,7 @@
     This header file is an extension to ANSI/POSIX
 #endif
 
-#pragma lib "/$M/lib/ape/libbsd.a"
+#pragma lib "ape/libbsd.a"
 
 /*-
  * Copyright (c) 1980, 1983, 1988 Regents of the University of California.

+ 1 - 1
sys/include/ape/pwd.h

@@ -3,7 +3,7 @@
 #ifndef _POSIX_SOURCE
    This header file is not defined in pure ANSI
 #endif
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 #include <sys/types.h>
 
 struct passwd {

+ 1 - 1
sys/include/ape/qlock.h

@@ -4,7 +4,7 @@
 
 #ifndef __QLOCK_H_
 #define __QLOCK_H_
-#pragma lib "/$M/lib/ape/lib9.a"
+#pragma lib "ape/lib9.a"
 
 #include <u.h>
 #include <lock.h>

+ 1 - 1
sys/include/ape/regexp.h

@@ -3,7 +3,7 @@
 #ifndef _REGEXP_EXTENSION
     This header file is an extension to ANSI/POSIX
 #endif
-#pragma lib "/$M/lib/ape/libregexp.a"
+#pragma lib "ape/libregexp.a"
 
 #ifdef	UTF
 #define	Runeself	0xA0

+ 1 - 1
sys/include/ape/select.h

@@ -3,7 +3,7 @@
 #ifndef _BSD_EXTENSION
     This header file is an extension to ANSI/POSIX
 #endif
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #ifndef _FD_SET_T
 #define _FD_SET_T

+ 1 - 1
sys/include/ape/setjmp.h

@@ -1,6 +1,6 @@
 #ifndef __SETJMP_H
 #define __SETJMP_H
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 typedef int jmp_buf[10];
 #ifdef _POSIX_SOURCE

+ 1 - 1
sys/include/ape/signal.h

@@ -1,6 +1,6 @@
 #ifndef __SIGNAL_H
 #define __SIGNAL_H
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 typedef int sig_atomic_t;
 

+ 2 - 2
sys/include/ape/stdio.h

@@ -1,6 +1,6 @@
 #ifndef	_STDIO_H_
 #define	_STDIO_H_
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 /*
  * pANS stdio.h
@@ -145,7 +145,7 @@ extern char *ctermid_r(char *);
 #endif
 
 #ifdef _BSD_EXTENSION
-#pragma lib "/$M/lib/ape/libbsd.a"
+#pragma lib "ape/libbsd.a"
 extern FILE *popen(char *, char *);
 extern int	pclose(FILE *);
 #endif

+ 1 - 1
sys/include/ape/stdlib.h

@@ -1,6 +1,6 @@
 #ifndef __STDLIB_H
 #define __STDLIB_H
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #include <stddef.h>
 

+ 1 - 1
sys/include/ape/string.h

@@ -1,6 +1,6 @@
 #ifndef __STRING_H_
 #define __STRING_H_
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #include <stddef.h>
 

+ 1 - 1
sys/include/ape/sys/pty.h

@@ -8,7 +8,7 @@
     This header file is an extension to ANSI/POSIX
 #endif
 
-#pragma lib "/$M/lib/ape/libbsd.a"
+#pragma lib "ape/libbsd.a"
 
 char*	ptsname(int);
 char*	ptmname(int);

+ 1 - 1
sys/include/ape/sys/select.h

@@ -3,7 +3,7 @@
 #ifndef _BSD_EXTENSION
     This header file is an extension to ANSI/POSIX
 #endif
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #ifndef _FD_SET_T
 #define _FD_SET_T

+ 1 - 1
sys/include/ape/sys/socket.h

@@ -5,7 +5,7 @@
     This header file is an extension to ANSI/POSIX
 #endif
 
-#pragma lib "/$M/lib/ape/libbsd.a"
+#pragma lib "ape/libbsd.a"
 
 #ifdef __cplusplus
 extern "C" {

+ 2 - 2
sys/include/ape/sys/stat.h

@@ -5,7 +5,7 @@
 #include <sys/types.h>
 #endif
 
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 /*
  * stat structure, used by stat(2) and fstat(2)
@@ -71,7 +71,7 @@ extern int fstat(int, struct stat *);
 extern int chmod(const char *, mode_t);
 
 #ifdef _BSD_EXTENSION
-#pragma lib "/$M/lib/ape/libbsd.a"
+#pragma lib "ape/libbsd.a"
 extern int	lstat(char *, struct stat *);
 extern int	symlink(char *, char *);
 extern int	readlink(char *, char*, int);

+ 1 - 1
sys/include/ape/sys/time.h

@@ -1,6 +1,6 @@
 #ifndef __SYSTIME_H
 #define __SYSTIME_H
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #ifndef __TIMEVAL__
 #define __TIMEVAL__

+ 1 - 1
sys/include/ape/sys/times.h

@@ -1,6 +1,6 @@
 #ifndef __TIMES_H
 #define __TIMES_H
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #ifndef _CLOCK_T
 #define _CLOCK_T

+ 1 - 1
sys/include/ape/sys/types.h

@@ -1,7 +1,7 @@
 #ifndef __TYPES_H
 #define __TYPES_H
 
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 typedef	unsigned short	ino_t;
 typedef	unsigned short	dev_t;
 typedef	long long		off_t;

+ 1 - 1
sys/include/ape/sys/uio.h

@@ -9,7 +9,7 @@
 extern "C" {
 #endif
 
-#pragma lib "/$M/lib/ape/libbsd.a"
+#pragma lib "ape/libbsd.a"
 
 /*
  * Copyright (c) 1982, 1986 Regents of the University of California.

+ 1 - 1
sys/include/ape/sys/utsname.h

@@ -1,6 +1,6 @@
 #ifndef __UTSNAME
 #define __UTSNAME
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 struct utsname {
 	char	*sysname;

+ 1 - 1
sys/include/ape/sys/wait.h

@@ -1,6 +1,6 @@
 #ifndef __WAIT_H
 #define __WAIT_H
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 /* flag bits for third argument of waitpid */
 #define WNOHANG		0x1

+ 1 - 1
sys/include/ape/termios.h

@@ -1,4 +1,4 @@
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 /* input modes */
 #define BRKINT	0x001
 #define ICRNL	0x002

+ 1 - 1
sys/include/ape/time.h

@@ -1,6 +1,6 @@
 #ifndef __TIME_H
 #define __TIME_H
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #include <stddef.h>
 

+ 1 - 1
sys/include/ape/unistd.h

@@ -3,7 +3,7 @@
 #ifndef _POSIX_SOURCE
    This header file is not defined in pure ANSI
 #endif
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 #define _POSIX_VERSION	199309L
 #define _POSIX_ASYNC_IO -1

+ 2 - 2
sys/include/ape/utf.h

@@ -1,13 +1,13 @@
 #ifndef _UTF_H_
 #define _UTF_H_ 1
-#pragma lib "/$M/lib/ape/libutf.a"
+#pragma lib "ape/libutf.a"
 #pragma src "/sys/src/ape/lib/utf"
 
 #if defined(__cplusplus)
 extern "C" { 
 #endif
 
-typedef unsigned int Rune;	/* 32 bits */
+typedef unsigned short Rune;	/* 32 bits */
 
 enum
 {

+ 1 - 1
sys/include/ape/utime.h

@@ -1,7 +1,7 @@
 #ifndef __UTIME_H
 #define __UTIME_H
 
-#pragma lib "/$M/lib/ape/libap.a"
+#pragma lib "ape/libap.a"
 
 struct utimbuf
 {

+ 10 - 0
sys/src/ape/9src/mkfile

@@ -0,0 +1,10 @@
+MKSHELL=rc
+<$NXM/$objtype/mkfile
+
+#	ptyfs\	# for X11, not going to bother
+TARG=\
+	stty\
+	tar\
+
+BIN=$NXM/$objtype/bin/ape
+<$NXM/sys/src/cmd/mkmany

+ 266 - 0
sys/src/ape/9src/stty.c

@@ -0,0 +1,266 @@
+#include <u.h>
+#include <libc.h>
+#include <tty.h>
+
+typedef struct Mode Mode;
+struct Mode
+{
+	char*	name;
+	int	bit;
+};
+
+Mode ou[] =
+{
+	"opost",	OPOST,
+	"olcuc",	OLCUC,
+	"onlcr",	ONLCR,
+	"ocrnl",	OCRNL,
+	"onocr",	ONOCR,
+	"onlret",	ONLRET,
+	"ofill",	OFILL,
+	"ofdel",	OFDEL,
+	0
+};
+
+Mode in[] =
+{
+	"brkint",	BRKINT,
+	"icrnl",	ICRNL,
+	"ignbrk",	IGNBRK,
+	"igncr",	IGNCR,
+	"ignpar",	IGNPAR,
+	"inlcr",	INLCR,
+	"inpck",	INPCK,
+	"istrip",	ISTRIP,
+	"ixoff",	IXOFF,
+	"ixon",		IXON,
+	"parmrk",	PARMRK,
+	0
+};
+
+Mode lo[] =
+{
+	"echo",		ECHO,
+	"echoe",	ECHOE,
+	"echok", 	ECHOK,
+	"echonl",	ECHONL,
+	"icanon",	ICANON,
+	"iexten",	IEXTEN,
+	"isig",		ISIG,
+	"noflsh",	NOFLSH,
+	"tostop",	TOSTOP,
+	0
+};
+
+Mode cc[] =
+{
+	"eof",		VEOF,
+	"eol",		VEOL,
+	"erase",	VERASE,
+	"intr",		VINTR,
+	"kill",		VKILL,
+	"min",		VMIN,
+	"quit",		VQUIT,
+	"susp",		VSUSP,
+	"time",		VTIME,
+	"start",	VSTART,
+	"stop",		VSTOP,
+	0,
+};
+
+int	getmode(int, Termios*);
+int	setmode(int, Termios*);
+
+char*
+ctlchar(char c)
+{
+	static char buf[10];
+
+	if(c == 0x7f)
+		return "DEL";
+	if(c == 0)
+		return "NUL";
+	if(c < 32) {
+		buf[0] = '^';
+		buf[1] = '@'+c;
+		buf[2] = '\0';
+		return buf;
+	}	
+	buf[0] = c;
+	buf[1] = '\0';
+	return buf;
+}
+
+void
+showmode(Termios *t)
+{
+	int i;
+
+	for(i = 0; cc[i].name; i++) {
+		switch(cc[i].bit) {
+		case VMIN:
+		case VTIME:
+			if(t->cc[i] != 0)
+				print("%s %d ", cc[i].name, t->cc[i]);
+			break;
+		default:
+			print("%s %s ", cc[i].name, ctlchar(t->cc[i]));
+			break;
+		}
+	}
+	print("\n");
+
+	for(i = 0; ou[i].name; i++)
+		if(ou[i].bit & t->oflag)
+			print("%s ", ou[i].name);
+
+	for(i = 0; in[i].name; i++)
+		if(in[i].bit & t->iflag)
+			print("%s ", in[i].name);
+
+	print("\n");
+	for(i = 0; lo[i].name; i++)
+		if(lo[i].bit & t->lflag)
+			print("%s ", lo[i].name);
+	print("\n");
+}
+
+int
+setreset(char *mode, int *bits, Mode *t)
+{
+	int i, clr;
+
+	clr = 0;
+	if(mode[0] == '-') {
+		mode++;
+		clr = 1;
+	}
+	for(i = 0; t[i].name; i++) {
+		if(strcmp(mode, t[i].name) == 0) {
+			if(clr)
+				*bits &= ~t[i].bit;
+			else
+				*bits |= t[i].bit;
+
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int
+ccname(char *name)
+{
+	int i;
+
+	for(i = 0; cc[i].name; i++)
+		if(strcmp(cc[i].name, name) == 0)
+			return i;
+
+	return -1;
+}
+
+void
+main(int argc, char **argv)
+{
+	Termios t;
+	int i, stdin, wmo, cc;
+
+	/* Try and get a seek pointer */
+	stdin = open("/fd/0", ORDWR);
+	if(stdin < 0)
+		stdin = 0;
+
+	if(getmode(stdin, &t) < 0) {
+		fprint(2, "stty: tiocget %r\n");
+		exits("1");
+	}
+
+	if(argc < 2) {
+		fprint(2, "usage: stty [-a|-g] modes...\n");
+		exits("1");
+	}
+	wmo = 0;
+	for(i = 1; i < argc; i++) {
+		if(strcmp(argv[i], "-a") == 0) {
+			showmode(&t);
+			continue;
+		}
+		if(setreset(argv[i], &t.iflag, in)) {
+			wmo++;
+			continue;
+		}
+		if(setreset(argv[i], &t.lflag, lo)) {
+			wmo++;
+			continue;
+		}
+		if(setreset(argv[i], &t.oflag, ou)) {
+			wmo++;
+			continue;
+		}
+		cc = ccname(argv[i]);
+		if(cc != -1 && i+1 < argc) {
+			wmo++;
+			t.cc[cc] = argv[++i][0];
+			continue;
+		}
+		fprint(2, "stty: bad option/mode %s\n", argv[i]);
+		exits("1");
+	}
+
+	if(wmo) {
+		if(setmode(stdin, &t) < 0) {
+			fprint(2, "stty: cant set mode %r\n");
+			exits("1");
+		}
+	}
+
+	exits(0);
+}
+
+int
+setmode(int fd, Termios *t)
+{
+	int n, i;
+	char buf[256];
+
+	n = sprint(buf, "IOW %4.4ux %4.4ux %4.4ux %4.4ux ",
+		t->iflag, t->oflag, t->cflag, t->lflag);
+	for(i = 0; i < NCCS; i++)
+		n += sprint(buf+n, "%2.2ux ", t->cc[i]);
+
+	if(seek(fd, -2, 0) != -2)
+		return -1;
+
+	n = write(fd, buf, n);
+	if(n < 0)
+		return -1;
+	return 0;
+}
+
+/*
+ * Format is: IOR iiii oooo cccc llll xx xx xx xx ...
+ */
+int
+getmode(int fd, Termios *t)
+{
+	int n;
+	char buf[256];
+
+	if(seek(fd, -2, 0) != -2)
+		return -1;
+
+	n = read(fd, buf, 57);
+	if(n < 0)
+		return -1;
+
+	t->iflag = strtoul(buf+4, 0, 16);
+	t->oflag = strtoul(buf+9, 0, 16);
+	t->cflag = strtoul(buf+14, 0, 16);
+	t->lflag = strtoul(buf+19, 0, 16);
+
+	for(n = 0; n < NCCS; n++)
+		t->cc[n] = strtoul(buf+24+(n*3), 0, 16);
+
+	return 0;
+}

+ 127 - 0
sys/src/ape/9src/tar.c

@@ -0,0 +1,127 @@
+/*
+ * Attempt at emulation of Unix tar by calling Plan 9 tar.
+ * 
+ * The differences from Plan 9 tar are:
+ *	In the absence of an "f" flag, the file /dev/tape is used.
+ *	An "f" flag with argument "-" causes use of stdin/stdout
+ *		by passing no "f" flag (nor argument) to Plan 9 tar.
+ *	By default, the "T" flag is passed to Plan 9 tar.
+ *		The "m" flag to this tar inhibits this behavior.
+ */
+
+#include <u.h>
+#include <libc.h>
+
+void
+usage(void)
+{
+	fprint(2, "usage: ape/tar [crtx][vfm] [args...] [file...]\n");
+	exits("usage");
+}
+
+void
+main(int argc, char **argv)
+{
+	int i, j, verb, vflag, fflag, Tflag, nargc;
+	char *p, *file, **nargv, *cpu, flagbuf[10], execbuf[128];
+	Waitmsg *w;
+
+	argv++, argc--;
+	if(argc < 1)
+		usage();
+
+	p = argv[0];
+	argv++, argc--;
+
+	if(*p == '-')
+		p++;
+
+	if(strchr("crtx", *p) == nil)
+		usage();
+	verb = *p++;
+
+	/* unix defaults */
+	fflag = 1;
+	file = "/dev/tape";
+	Tflag = 1;
+	vflag = 0;
+
+	for(; *p; p++) {
+		switch(*p) {
+		default:
+			usage();
+		case 'v':
+			vflag = 1;
+			break;
+		case 'f':
+			if(argc <= 0)
+				usage();
+
+			fflag = 1;
+			file = argv[0];
+			argv++, argc--;
+			if(strcmp(file, "-") == 0) {
+				/*
+				 * plan9 doesn't know about "-" meaning stdin/stdout,
+				 * but it's the default,
+				 * so rewrite to not use f flag at all.
+				 */
+				file = nil;
+				fflag = 0;
+			}
+			break;
+		case 'm':
+			Tflag = 0;
+			break;
+		case 'p':		/* pretend nothing's wrong */
+			break;
+		}
+	}
+
+	nargc = 1 + 1 + fflag + argc + 1;
+	nargv = malloc(sizeof(char*) * nargc);
+	if(nargv == nil) {
+		fprint(2, "ape/tar: out of memory\n");
+		exits("memory");
+	}
+
+	cpu = getenv("cputype");
+	if(cpu == nil) {
+		fprint(2, "ape/tar: need cputype environment variable set\n");
+		exits("cputype");
+	}
+	snprint(execbuf, sizeof execbuf, "/%s/bin/tar", cpu);
+
+	nargv[0] = "tar";
+	sprint(flagbuf, "%c%s%s%s", verb, vflag ? "v" : "", Tflag ? "T" : "", fflag ? "f" : "");
+	nargv[1] = flagbuf;
+
+	i = 2;
+	if(fflag)
+		nargv[i++] = file;
+
+	for(j=0; j<argc; j++, i++)
+		nargv[i] = argv[j];
+
+	nargv[i++] = nil;
+	assert(i == nargc);
+
+	switch(fork()){
+	case -1:
+		fprint(2, "ape/tar: fork failed: %r\n");
+		exits("fork");
+	case 0:
+		exec(execbuf, nargv);
+		fprint(2, "exec %s fails: %r\n", execbuf);
+		_exits("exec");
+	default:
+		w = wait();
+		if(w == nil)
+			exits("wait failed");
+		if(w->msg[0] == '\0')
+			exits(nil);
+		else
+			exits(w->msg);
+	}
+	assert(0);
+}

+ 127 - 0
sys/src/ape/9src/tty.h

@@ -0,0 +1,127 @@
+/* input modes */
+#define BRKINT	0x001
+#define ICRNL	0x002
+#define IGNBRK	0x004
+#define IGNCR	0x008
+#define IGNPAR	0x010
+#define INLCR	0x020
+#define INPCK	0x040
+#define ISTRIP	0x080
+#define IXOFF	0x100
+#define IXON	0x200
+#define PARMRK	0x400
+
+/* output modes */
+#define	OPOST	0000001
+#define	OLCUC	0000002
+#define	ONLCR	0000004
+#define	OCRNL	0000010
+#define	ONOCR	0000020
+#define	ONLRET	0000040
+#define	OFILL	0000100
+#define	OFDEL	0000200
+#define	NLDLY	0000400
+#define	NL0	0
+#define	NL1	0000400
+#define	CRDLY	0003000
+#define	CR0	0
+#define	CR1	0001000
+#define	CR2	0002000
+#define	CR3	0003000
+#define	TABDLY	0014000
+#define	TAB0	0
+#define	TAB1	0004000
+#define	TAB2	0010000
+#define	TAB3	0014000
+#define	BSDLY	0020000
+#define	BS0	0
+#define	BS1	0020000
+#define	VTDLY	0040000
+#define	VT0	0
+#define	VT1	0040000
+#define	FFDLY	0100000
+#define	FF0	0
+#define	FF1	0100000
+
+/* control modes */
+#define CLOCAL	0x001
+#define CREAD	0x002
+#define CSIZE	0x01C
+#define CS5	0x004
+#define CS6	0x008
+#define CS7	0x00C
+#define CS8	0x010
+#define CSTOPB	0x020
+#define HUPCL	0x040
+#define PARENB	0x080
+#define PARODD	0x100
+
+/* local modes */
+#define ECHO	0x001
+#define ECHOE	0x002
+#define ECHOK	0x004
+#define ECHONL	0x008
+#define ICANON	0x010
+#define IEXTEN	0x020
+#define ISIG	0x040
+#define NOFLSH	0x080
+#define TOSTOP	0x100
+
+/* control characters */
+#define VEOF	0
+#define VEOL	1
+#define VERASE	2
+#define VINTR	3
+#define VKILL	4
+#define VMIN	5
+#define VQUIT	6
+#define VSUSP	7
+#define VTIME	8
+#define VSTART	9
+#define VSTOP	10
+#define NCCS	11
+
+/* baud rates */
+#define B0	0
+#define B50	1
+#define B75	2
+#define B110	3
+#define B134	4
+#define B150	5
+#define B200	6
+#define B300	7
+#define B600	8
+#define B1200	9
+#define B1800	10
+#define B2400	11
+#define B4800	12
+#define B9600	13
+#define B19200	14
+#define B38400	15
+
+#define	CESC	'\\'
+#define	CINTR	0177	/* DEL */
+#define	CQUIT	034	/* FS, cntl | */
+#define	CERASE	010	/* BS */
+#define	CKILL	025	/* cntl u */
+#define	CEOF	04	/* cntl d */
+#define	CSTART	021	/* cntl q */
+#define	CSTOP	023	/* cntl s */
+#define	CSWTCH	032	/* cntl z */
+#define CEOL	000
+#define	CNSWTCH	0
+
+/* optional actions for tcsetattr */
+#define TCSANOW	  1
+#define TCSADRAIN 2
+#define TCSAFLUSH 3
+
+typedef struct Termios Termios;
+struct Termios
+{
+	int	iflag;		/* input modes */
+	int	oflag;		/* output modes */
+	int	cflag;		/* control modes */
+	int	lflag;		/* local modes */
+	uchar	cc[NCCS];	/* control characters */
+};

+ 189 - 0
sys/src/ape/cmd/README

@@ -0,0 +1,189 @@
+This is an attempt to make the utilities specified in
+POSIX 1002.3 available, assuming /$objtype/ape/bin
+and /lib/rc/ape are bound to /bin before the regular
+bin directories.
+
+Here's a brief description of the status of these commands.
+
+EXECUTION ENVIRONMENT UTILITIES
+
+awk	Plan 9 awk.
+	system() uses rc instead of sh.
+
+basename POSIX conforming
+
+bc	Plan 9 bc.
+
+cat	Plan 9 cat.
+	no -u option (for byte-at-at-time)
+
+cd	shell builtins
+	doesn't use $HOME or $CDPATH
+
+chgrp	Plan 9 chgrp.
+	no -R option (for recursive chgrp).
+	only takes name, not number
+
+
+chmod	Plan 9 chmod.
+	no -R option (for recursive chmod).
+	no s (setuid) and X (conditional x) perms.
+	nonstandard a,l perms.
+
+chown	Always prints 'Permission denied' and fails.
+
+cksum	not implemented
+
+cmp	Plan 9 cmp.
+	nonstandard -L option
+	no line number printed; hex instead of octal for bytes
+
+comm	Plan 9 comm.
+
+command	not implemented
+
+cp	Plan 9 cp.
+	no -R and -r (recursive), -i (interactive), -p (preserve) options
+	nonstandard -z option
+
+cut	not implemented
+
+date	Plan 9 date.
+	no format option
+	nonstandard -n option
+
+dd	Plan 9 dd.
+
+diff	Plan 9 diff.
+	can't have both files directories
+	no -r (recursive) option
+	-c<n> instead of -c and -C <n> for context
+
+dirname	POSIX conforming
+
+echo	Plan 9 echo
+
+ed	Plan 9 ed
+	nonstandard b,wq commands
+
+env	not implemented
+
+expr	V10 expr (seems to be like POSIX)
+
+false	POSIX conforming
+
+find	not implemented
+
+fold	not implemented
+
+getconf	not implemented
+
+getopts	not implemented
+
+grep	script calling Plan 9 grep -G
+	s means q, should mean forget nonexistent files
+	nonstandard 1,b,L,q options
+
+head	not implemented
+
+id	not implemented
+
+join	not implemented
+
+kill	V10 kill
+	no -s signalname, no -l arg
+
+ln	not implemented
+
+locale	not implemented
+
+localedef not implemented
+
+logger	not implemented
+
+logname	not implemented
+
+lp	Plan 9 lp
+
+ls	Plan 9 ls
+
+mailx	not implemented
+
+mkdir	Plan 9 mkdir
+
+mkfifo	not implemented
+
+mv	Plan 9 mv
+
+nohup	not implemented
+
+od	not implemented
+
+paste	not implemented
+
+pathchk	not implemented
+
+pax	implemented
+
+pr	Plan 9 pr
+
+printf	not implemented
+
+pwd	Plan 9 pwd
+
+read	shell builtin
+
+rm	Plan 9 rm
+
+rmdir	script
+	no -p option
+
+sed	v10 sed
+
+sh	ksh93 -- POSIX compliant
+
+sleep	Plan 9 sleep
+
+sort	Plan 9 sort
+
+stty	POSIX compliant (sort of)
+
+tail	Plan 9 tail
+
+tee	Plan 9 tee
+
+test	Plan 9 test (POSIX compliant); copied as [
+
+touch	Plan 9 touch
+
+tr	Plan 9 tr
+
+true	POSIX compliant
+
+umask	noop
+
+SOFTWARE DEVELOPMENT UTILITIES (OPTIONAL)
+
+ar	script to call Plan9 ar, after arg conversion
+
+make	V10 make
+
+strip	not implemented
+
+C LANGUAGE DEVELOPMENT UTILITIES OPTION
+
+c89	script to APE environment cc (also avaiable as cc)
+
+lex	Plan 9 lex
+
+yacc	script to Plan 9 yacc
+
+General Bugs:
+The environment variables LANG, LC_ALL,
+LC_CTYPE, and LC_MESSAGES are ignored.
+
+The use of -- as an argument to stop option processing
+is generally not done.
+
+The many 'not implemented' functions will be implemented
+as scripts using them show up.

+ 45 - 0
sys/src/ape/cmd/basename.c

@@ -0,0 +1,45 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+void
+main(int argc, char **argv)
+{
+	char *f, *b, *s;
+	int n;
+
+	if(argc < 2 || argc > 3){
+		fprintf(stderr, "Usage: basename string [suffix]\n");
+		exit(1);
+	}
+	s = argv[1];
+	b = s + strlen(s) - 1;
+	while(b > s && *b == '/')
+		b--;
+	*++b = 0;
+	if(b == s+1 && s[0] == '/') {
+		printf("/");
+		exit(0);
+	}
+	/* now b is after last char of string, trailing slashes removed */
+
+	for(f = b; f >= s; f--)
+		if(*f == '/'){
+			f++;
+			break;
+		}
+	if(f < s)
+		f = s;
+
+	/* now f is first char after last remaining slash, or first char */
+
+	if(argc == 3){
+		n = strlen(argv[2]);
+		if(n < b-f && strncmp(b-n, argv[2], n) == 0){
+			b -= n;
+			*b = 0;
+		}
+	}
+	printf("%s\n", f);
+	exit(0);
+}

+ 387 - 0
sys/src/ape/cmd/cc.c

@@ -0,0 +1,387 @@
+#include <u.h>
+#include <libc.h>
+
+/*
+   POSIX standard c89
+
+   standard options: -c, -D name[=val], -E (preprocess to stdout),
+       -g, -L dir, -o outfile, -O, -s, -U name
+       (and operands can have -l lib interspersed)
+   
+    nonstandard but specified options: -S (assembly language left in .s),
+       -Wx,arg1[,arg2...] (pass arg(s) to phase x, where x is p (cpp)
+   			 0 (compiler), or l (loader)
+    nonstandard options: -v (echo real commands to stdout as they execute)
+	-A: turn on ANSI prototype warnings
+ */
+
+typedef struct Objtype {
+	char	*name;
+	char	*cc;
+	char	*ld;
+	char	*o;
+} Objtype;
+
+Objtype objtype[] = {
+	{"68020",	"2c", "2l", "2"},
+	{"arm",		"5c", "5l", "5"},
+	{"amd64",	"6c", "6l", "6"},
+	{"alpha",	"7c", "7l", "7"},
+	{"386",		"8c", "8l", "8"},
+	{"sparc",	"kc", "kl", "k"},
+	{"power",	"qc", "ql", "q"},
+	{"mips",	"vc", "vl", "v"},
+};
+
+enum {
+	Nobjs = (sizeof objtype)/(sizeof objtype[0]),
+	Maxlist = 2000,
+};
+
+typedef struct List {
+	char	*strings[Maxlist];
+	int	n;
+} List;
+
+List	srcs, objs, cpp, cc, ld, ldargs, srchlibs;
+int	cflag, vflag, Eflag, Sflag, Aflag;
+char	*allos = "2678kqv";
+
+void	append(List *, char *);
+char	*changeext(char *, char *);
+void	doexec(char *, List *);
+void	dopipe(char *, List *, char *, List *);
+void	fatal(char *);
+Objtype	*findoty(void);
+void	printlist(List *);
+char *searchlib(char *, char*);
+
+void
+main(int argc, char *argv[])
+{
+	char *s, *suf, *ccpath, *lib;
+	char *oname;
+	int haveoname = 0;
+	int i, cppn, ccn;
+	Objtype *ot;
+
+	ot = findoty();
+	oname = "a.out";
+	append(&cpp, "cpp");
+	append(&cpp, "-D__STDC__=1");	/* ANSI says so */
+	append(&cpp, "-D_POSIX_SOURCE=");
+	append(&cpp, "-N");		/* turn off standard includes */
+	append(&cc, ot->cc);
+	append(&ld, ot->ld);
+	append(&srchlibs, smprint("/%s/lib/ape", ot->name));
+	while(argc > 0) {
+		ARGBEGIN {
+		case 'c':
+			cflag = 1;
+			break;
+		case 'l':
+			lib = searchlib(ARGF(), ot->name);
+			if(!lib)
+				fprint(2, "cc: can't find library for -l\n");
+			else
+				append(&objs, lib);
+			break;
+		case 'o':
+			oname = ARGF();
+			haveoname = 1;
+			if(!oname)
+				fatal("cc: no -o argument");
+			break;
+		case 'D':
+		case 'I':
+		case 'U':
+			append(&cpp, smprint("-%c%s", ARGC(), ARGF()));
+			break;
+		case 'E':
+			Eflag = 1;
+			cflag = 1;
+			break;
+		case 's':
+		case 'g':
+			break;
+		case 'L':
+			lib = ARGF();
+			if(!lib)
+				fprint(2, "cc: no -L argument\n");
+			else
+				append(&srchlibs, lib);
+			break;
+		case 'N':
+		case 'T':
+		case 'w':
+			append(&cc, smprint("-%c", ARGC()));
+			break;
+		case 'O':
+			break;
+		case 'W':
+			s = ARGF();
+			if(s && s[1]==',') {
+				switch (s[0]) {
+				case 'p':
+					append(&cpp, s+2);
+					break;
+				case '0':
+					append(&cc, s+2);
+					break;
+				case 'l':
+					append(&ldargs, s+2);
+					break;
+				default:
+					fprint(2, "cc: pass letter after -W should be one of p0l; ignored\n");
+				}
+			} else
+				fprint(2, "cc: bad option after -W; ignored\n");
+			break;
+		case 'v':
+			vflag = 1;
+			append(&ldargs, "-v");
+			break;
+		case 'A':
+			Aflag = 1;
+			break;
+		case 'S':
+			Sflag = 1;
+			break;
+		default:
+			fprint(2, "cc: flag -%c ignored\n", ARGC());
+			break;
+		} ARGEND
+		if(!Aflag) {
+			append(&cc, "-J");		/* old/new decl mixture hack */
+			append(&cc, "-B");		/* turn off non-prototype warnings */
+		}
+		if(argc > 0) {
+			s = argv[0];
+			suf = utfrrune(s, '.');
+			if(suf) {
+				suf++;
+				if(strcmp(suf, "c") == 0) {
+					append(&srcs, s);
+					append(&objs, changeext(s, "o"));
+				} else if(strcmp(suf, "o") == 0 ||
+					  strcmp(suf, ot->o) == 0 ||
+					  strcmp(suf, "a") == 0 ||
+					  (suf[0] == 'a' && strcmp(suf+1, ot->o) == 0)) {
+					append(&objs, s);
+				} else if(utfrune(allos, suf[0]) != 0) {
+					fprint(2, "cc: argument %s ignored: wrong architecture\n",
+						s);
+				}
+			}
+		}
+	}
+	if(objs.n == 0)
+		fatal("no files to compile or load");
+	ccpath = smprint("/bin/%s", ot->cc);
+	append(&cpp, smprint("-I/%s/include/ape", ot->name));
+	append(&cpp, "-I/sys/include/ape");
+	cppn = cpp.n;
+	ccn = cc.n;
+	for(i = 0; i < srcs.n; i++) {
+		append(&cpp, srcs.strings[i]);
+		if(Eflag)
+			doexec("/bin/cpp", &cpp);
+		else {
+			if(Sflag)
+				append(&cc, "-S");
+			else {
+				append(&cc, "-o");
+				if (haveoname && cflag)
+					append(&cc, oname);
+				else
+					append(&cc, changeext(srcs.strings[i], "o"));
+			}
+			dopipe("/bin/cpp", &cpp, ccpath, &cc);
+		}
+		cpp.n = cppn;
+		cc.n = ccn;
+	}
+	if(!cflag) {
+		append(&ld, "-o");
+		append(&ld, oname);
+		for(i = 0; i < ldargs.n; i++)
+			append(&ld, ldargs.strings[i]);
+		for(i = 0; i < objs.n; i++)
+			append(&ld, objs.strings[i]);
+		append(&ld, smprint("/%s/lib/ape/libap.a", ot->name));
+		doexec(smprint("/bin/%s", ot->ld), &ld);
+		if(objs.n == 1)
+			remove(objs.strings[0]);
+	}
+
+	exits(0);
+}
+
+char *
+searchlib(char *s, char *objtype)
+{
+	char *l;
+	int i;
+
+	if(!s)
+		return 0;
+	for(i = srchlibs.n-1; i>=0; i--) {
+		l = smprint("%s/lib%s.a", srchlibs.strings[i], s);
+		if(access(l, 0) >= 0)
+			return l;
+	}
+	if(s[1] == 0)
+		switch(s[0]) {
+		case 'c':
+			l = smprint("/%s/lib/ape/libap.a", objtype);
+			break;
+		case 'm':
+			l = smprint("/%s/lib/ape/libap.a", objtype);
+			break;
+		case 'l':
+			l = smprint("/%s/lib/ape/libl.a", objtype);
+			break;
+		case 'y':
+			l = smprint("/%s/lib/ape/liby.a", objtype);
+			break;
+		default:
+			l = 0;
+		}
+	else
+		l = 0;
+	return l;
+}
+
+void
+append(List *l, char *s)
+{
+	if(l->n >= Maxlist-1)
+		fatal("too many arguments");
+	l->strings[l->n++] = s;
+	l->strings[l->n] = 0;
+}
+
+void
+doexec(char *c, List *a)
+{
+	Waitmsg *w;
+
+	if(vflag) {
+		printlist(a);
+		fprint(2, "\n");
+	}
+	switch(fork()) {
+	case -1:
+		fatal("fork failed");
+	case 0:
+		exec(c, a->strings);
+		fatal("exec failed");
+	}
+	if((w = wait()) == nil)
+		fatal("wait failed");
+	if(w->msg[0])
+		fatal(smprint("%s: %s", a->strings[0], w->msg));
+	free(w);
+}
+
+void
+dopipe(char *c1, List *a1, char *c2, List *a2)
+{
+	Waitmsg *w;
+	int pid1, got;
+	int fd[2];
+
+	if(vflag) {
+		printlist(a1);
+		fprint(2, " | ");
+		printlist(a2);
+		fprint(2, "\n");
+	}
+	if(pipe(fd) < 0)
+		fatal("pipe failed");
+	switch((pid1 = fork())) {
+	case -1:
+		fatal("fork failed");
+	case 0:
+		dup(fd[0], 0);
+		close(fd[0]);
+		close(fd[1]);
+		exec(c2, a2->strings);
+		fatal("exec failed");
+	}
+	switch(fork()) {
+	case -1:
+		fatal("fork failed");
+	case 0:
+		close(0);
+		dup(fd[1], 1);
+		close(fd[0]);
+		close(fd[1]);
+		exec(c1, a1->strings);
+		fatal("exec failed");
+	}
+	close(fd[0]);
+	close(fd[1]);
+	for(got = 0; got < 2; got++) {
+		if((w = wait()) == nil)
+			fatal("wait failed");
+		if(w->msg[0])
+			fatal(smprint("%s: %s", (w->pid == pid1) ? a1->strings[0] : a2->strings[0], w->msg));
+		free(w);
+	}
+}
+
+Objtype *
+findoty(void)
+{
+	char *o;
+	Objtype *oty;
+
+	o = getenv("objtype");
+	if(!o)
+		fatal("no $objtype in environment");
+	for(oty = objtype; oty < &objtype[Nobjs]; oty++)
+		if(strcmp(o, oty->name) == 0)
+			return oty;
+	fatal("unknown $objtype");
+	return 0;			/* shut compiler up */
+}
+
+void
+fatal(char *msg)
+{
+	fprint(2, "cc: %s\n", msg);
+	exits(msg);
+}
+
+/* src ends in .something; return copy of basename with .ext added */
+char *
+changeext(char *src, char *ext)
+{
+	char *b, *e, *ans;
+
+	b = utfrrune(src, '/');
+	if(b)
+		b++;
+	else
+		b = src;
+	e = utfrrune(src, '.');
+	if(!e)
+		return 0;
+	*e = 0;
+	ans = smprint("%s.%s", b, ext);
+	*e = '.';
+	return ans;
+}
+
+void
+printlist(List *l)
+{
+	int i;
+
+	for(i = 0; i < l->n; i++) {
+		fprint(2, "%s", l->strings[i]);
+		if(i < l->n - 1)
+			fprint(2, " ");
+	}
+}

+ 339 - 0
sys/src/ape/cmd/diff/COPYING

@@ -0,0 +1,339 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                          675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	Appendix: How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) 19yy  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) 19yy name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.

+ 1766 - 0
sys/src/ape/cmd/diff/ChangeLog

@@ -0,0 +1,1766 @@
+Sat Oct  1 05:24:19 1994  Paul Eggert  <eggert@twinsun.com>
+
+	* Version 2.7 released.
+
+	* configure.in (AC_HEADER_SYS_WAIT): Add.
+	(AC_CHECK_HEADERS): Remove sys/wait.h.
+	(AC_CHECK_FUNCS): Add tmpnam.
+	* system.h (<sys/wait.h>, WEXITSTATUS): Use simpler scheme
+	now that HAVE_SYS_WAIT_H is not set on hosts
+	that are incompatible with Posix applications.
+
+	* util.c (dir_file_pathname): Use filename_lastdirchar not strrchr.
+	* sdiff.c (expand_name): Likewise.
+	(private_tempnam): Use tmpnam if HAVE_TMPNAM; this simplifies porting.
+	(exists, letters): Omit if HAVE_TMPNAM.
+
+	* diff3.c (read_diff): If STAT_BLOCKSIZE yields zero,
+	adjust it to a more reasonable value.
+
+Sat Sep 24 20:36:40 1994  Paul Eggert  <eggert@twinsun.com>
+
+	* sdiff.c (exists, private_tempname): Adopt latest GNU libc algorithm.
+	(private_tempnam): Specialize for sdiff to avoid portability problems.
+
+Thu Sep 22 16:47:00 1994  Paul Eggert  <eggert@twinsun.com>
+
+	* configure.in (AC_ARG_PROGRAM): Added.
+	(AC_OUTPUT): Add [date > stamp-h].
+
+	* Makefile.in (DEFAULT_EDITOR_PROGRAM, DIFF_PROGRAM, LIBOBJS,
+	NULL_DEVICE, PR_PROGRAM, PROGRAMS): New variables.
+	(check, stamp-h.in, cmp.o, util.o): New targets.
+	(edit_program_name): New variable; replaces old binprefix method.
+	(install, uninstall): Use it.
+	(binprefix): Removed.
+	(distfiles): Add stamp-h.in.
+	(clean): Clean stamp-h.
+	(config.hin, config.h): Use time stamp files.
+	(cmp_o): Add $(LIBOBJS).
+	(install): Install info files from srcdir if they're not in `.'.
+
+	* cmp.c, io.c (word): Don't define if already defined.
+
+	* comp.c (main): Use setmode, not open(..., O_BINARY); this gets stdin.
+	Use NULL_DEVICE instead of "/dev/null".
+	(cmp): Use %lu instead of %ld when it is more likely to be right.
+
+	* diff.h (PR_FILE_NAME): Rename to PR_PROGRAM and move to Makefile.in,
+	util.c.
+
+	* diff3.c (main): Give proper diagnostic if too many labels were given.
+	(read_diff): Use SYSTEM_QUOTE_ARG.
+
+	* system.h: <string.h>: Include if HAVE_STRING_H, too.
+	<ctype.h>: Include here.  All includers changed.
+	(CTYPE_DOMAIN, ISDIGIT, ISPRINT, ISSPACE, ISUPPER): New macros that
+	work around common <ctype.h> problems.
+	(O_BINARY): Remove.
+	(SYSTEM_QUOTE_ARG): New macros.
+
+	* diff.c: Add comment.
+
+	* util.c (PR_PROGRAM): Moved here from diff.h.
+	(begin_output): Use SYSTEM_QUOTE_ARG.
+
+	* io.c (read_files): Set mode to binary before returning 1.
+
+	* sdiff.c (TMPDIR_ENV): New macro.
+	(DEFAULT_EDITOR_PROGRAM): Renamed from DEFAULT_EDITOR for consistency.
+	(expand_name): Change `isdir' to `is_dir' to avoid theoretical ctype
+	namespace contamination.
+	(main): Use SYSTEM_QUOTE_ARG.
+	(private_tempnam): Don't access "/tmp" directly; use PVT_tmpdir.
+
+Tue Sep 13 18:46:43 1994  Paul Eggert  <eggert@twinsun.com>
+
+	* configure.in (AC_FUNC_MEMCHR): Remove.  Autoconf didn't adopt this,
+	since we need not worry about an old experimental library
+	where memchr didn't work.
+	(AC_FUNC_MEMCMP): Not needed, since we only test for equality.
+	(AC_REPLACE_FUNCS): Add test for memchr.
+	(AC_CHECK_FUNCS): Check for memchr, not memcpy, since it'll be cached.
+	(AC_CHECK_HEADERS): Add string.h; regex.c uses on some old hosts.
+
+	* system.h (memcmp): Define in terms of bcmp.
+	Use HAVE_MEMCHR to test for all mem* routines.
+
+	* Makefile.in (srcs): Remove memcmp.c.
+	We use bcmp if memcmp doesn't work, since we only test for equality.
+
+Mon Sep 12 15:52:22 1994  Paul Eggert  <eggert@twinsun.com>
+
+	* configure.in (AC_CONFIG_HEADER): Rename config.h.in to config.hin.
+	(AC_ISC_POSIX, AC_MINIX): Go back to these old names for Autoconf 2.
+	(AC_CHECK_HEADERS): Remove now-redundant check for <string.h>.
+	(AC_CHECK_FUNCS): Check for strchr.
+	(AC_FUNC_MEMCHR, AC_FUNC_MEMCMP, AC_CHECK_FUNCS): Use special-purpose
+	macros when suitable.
+	* memcmp.c: New file.
+	* Makefile.in (CPPFLAGS, DEFS, CFLAGS, LDFLAGS, prefix, exec_prefix):
+	Default to autoconf-specified strings.
+	(COMPILE): Use the defaults.
+	(srcs): Add memcmp.c.
+	(distfiles): Rename config.h.in->config.hin, install.sh->install-sh.
+	(Makefile, config.h, config.hin, config.status): Rework for
+	compatibility with Autoconf 2.
+	* io.c (binary_file_p): Assume non-broken memchr.
+	* memchr.c: Assume compiler understands void *; otherwise
+	we don't match GCC's internal declaration of memchr.
+	* system.h: Use more modern autoconf approach to standard C headers.
+	* version.c: Include <config.h>, not "config.h".
+
+	* diff.c, diff.h (ignore_some_line_changes):
+	New variable; replaces `length_varies'.
+	(line_end_char): Replace with '\n'; it wasn't being used consistently.
+
+	* io.c (find_and_hash_each_line): Fix inconsistencies with -b -w -i and
+	incomplete lines.  Put incomplete lines into their own bucket.
+	This means line_cmp no longer needs line length arguments,
+	and equivalence classes' line lengths no longer need to include \n.
+	Invoke line_cmp only if ignore_some_line_changes.
+	(prepare_text_end): -B no longer ignores missing newlines.
+	(read_files): Allocate another bucket for incomplete lines.
+
+	* util.c (line_cmp): Now takes just two arguments.  No longer
+	optimizes for common case of exact equality; the caller does that
+	optimization now.  The caller is changed accordingly.
+	Optimize for the common case of mostly equality.
+	Use isupper+tolower instead of islower+toupper, for consistency.
+
+	* waitpid.c (waitpid): Fix typo with internal scoping.
+
+Thu Sep  8 08:23:15 1994  Paul Eggert  <eggert@twinsun.com>
+
+	* configure.in: Revamp for Autoconf 2.
+	* memchr.c, waitpid.c: New source files for substitute functions.
+	* Makefile.in (diff_o, diff3_o, sdiff_o): Add $(LIBOBJS).
+	(srcs): Add memchr.c, waitpid.c.
+	(distfiles): Add install.sh, memchr.c, waitpid.c, install.sh.
+	* system.h: Use Autoconf 2 style HAVE_DIRENT_H etc. macros for dirs.
+	* dir.c (dir_sort): Prefer NAMLEN (p) to strlen (p->d_name).
+	Change VOID_CLOSEDIR to CLOSEDIR_VOID for Autoconf 2.
+	* sdiff.c, util.c (memchr, waitpid): Remove; use new substitutes.
+	* diff3.c (read_diff): Use new waitpid substitute.
+
+	* cmp.c, diff.c, diff3.c, sdiff.c (check_stdout, try_help): New fns.
+	(usage): Just print more detailed usage message; let caller exit.
+	* diff.c (option_help): New variable.
+	(filetype): Add Posix.1b file types.
+
+Fri Sep  2 16:01:49 1994  Paul Eggert  <eggert@twinsun.com>
+
+	* configure.in: Switch to new autoconf names.  Add sys/file.h test.
+	* Makefile.in (distclean): Clean config.cache, config.log
+	(used by new autoconf).
+
+	* diff.c, diff3.c, (main), sdiff.c (trapsigs): If we'll have children,
+	make sure SIGCHLD isn't ignored.
+
+	* diff3.c (DIFF_CHUNK_SIZE): Removed.  Get size from STAT_BLOCKSIZE.
+	(INT_STRLEN_BOUND): New macro.
+
+	* ifdef.c (format_group, groups_letter_value):
+	Use * instead of [] in prototypes.
+
+	* system.h: Include <sys/file.h> only if HAVE_SYS_FILE_H.
+	(S_IXGRP, S_IXOTH, S_IXUSR): Remove unused macros.
+
+	* util.c (begin_output): Check fdopen result.
+
+	The following changes simplify porting to non-Posix environments.
+	* cmp.c, diff.c, diff3.c, sdiff.c, (main): Call initialize_main first.
+	* diff.c (binary_I_O): New variable for --binary option.
+	(main, usage, compare_files): Support --binary option.
+	(compare_files): Use filename_lastdirchar to find last
+	directory char in a file name.
+	* cmp.c (main), diff.c (compare_files), dir.c (compare_names,
+	diff_dirs): Use filename_cmp to compare file names.
+	Use same_file to determine whether two files are the same.
+	* context.c (print_context_label): Check whether ctime yields 0.
+	* diff3.c (read_diff), sdiff.c (cleanup, main, waitpid),
+	util.c (begin_output): Use popen+pclose if !HAVE_FORK.
+	* io.c (sip): If HAVE_SETMODE, test for binary files in O_BINARY mode.
+	* sdiff.c (ck_fdopen): Function removed.
+	(edit): Use system if !HAVE_FORK.
+	(execdiff): Now assumes caller has pushed all args, plus trailing 0.
+	All callers changed.
+	(private_tempnam): Try TMP if TMPDIR isn't defined.
+	Fit temporary filenames into 8.3 limit.
+	* system.h (STAT_BLOCKSIZE): Don't define if already defined.
+	(min, max): Undef if already defined.
+	(filename_cmp, filename_lastdirchar, HAVE_FORK, HAVE_SETMODE,
+	initialize_main O_BINARY, same_file): New macros.
+
+Fri Jun 17 11:23:53 1994  David J. MacKenzie  (djm@geech.gnu.ai.mit.edu)
+
+	* Makefile.in (info, dvi, diff.dvi): New targets.
+	(clean): Remove TeX output files.
+
+Fri Jun 17 05:37:52 1994  Paul Eggert  (eggert@twinsun.com)
+
+	* cmp.c, io.c (word): Change from typedef to #define, to avoid
+	collision with Unicos 8.0 <sys/types.h>, which also typedefs `word'.
+
+Thu Apr 15 00:53:01 1994  Paul Eggert  (eggert@twinsun.com)
+
+	* diff3.c (scan_diff_line), util.c (print_number_range): Don't
+	rely on promotion to make the old-style parameter type agree
+	with the prototype parameter type; this doesn't work on
+	Apollos running bsd4.3.
+
+Mon Jan  3 02:05:51 1994  Paul Eggert  (eggert@twinsun.com)
+
+	* Makefile.in (LDFLAGS): Remove -g.  Change all link commands
+	to use both $(CFLAGS) and $(LDFLAGS).
+
+Mon Dec 13 12:23:27 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* system.h: Don't assume dirent.h exists just because
+	_POSIX_VERSION is defined.
+
+Fri Dec  3 18:39:39 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* diff.c (main): allow -pu.
+
+Tue Nov 23 03:51:08 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* Makefile.in (distclean): Remove config.h.
+
+Wed Nov 10 00:28:27 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* Version 2.6 released.
+
+	* analyze.c (too_expensive): New variable, for heuristic to
+	limit the worst-case cost to O(N**1.5 log N) at the price of
+	producing suboptimal output for large inputs with many differences.
+	(diff_2_files): Initialize it.
+	(struct partition): New type.
+	(SNAKE_LIMIT): New macro; merely documents already-used number 20.
+	(diag): New `minimal' arg; all callers changed.  Put results into
+	struct partition.  Apply `too_expensive' heuristic.  Tune.
+	(compareseq): New `minimal' arg; all callers changed.  Tune.
+	(shift_boundaries): Improve heuristic to also coalesce adjacent runs
+	of changes more often.
+
+	* diff.c (long_options, main, usage): Add `--help'.
+	(main): Send version number to stdout, not stderr.
+	(usage): Send usage to stdout, not stderr.
+	(compare_files): Initialize `inf' properly.
+
+	* io.c (word): Change to `int'; it makes a big difference on x86.
+	(sip, slurp): Put off allocating room to hold the whole file until we
+	have to read the whole file.  This wins if the file turns out
+	to be binary.
+
+	* util.c (xmalloc, xrealloc): "virtual memory" -> "memory"
+	(primes): Omit large primes if INT_MAX is small.
+
+	* sdiff.c (usage): Send usage to stdout, not stderr.
+	(long_options, main, usage): Add `--help'.
+	(main): Send version number to stdout, not stderr.  Exit afterwards.
+
+	* diff3.c (usage): Send usage to stdout, not stderr.
+	(long_options, main, usage): Add `--help'.
+	(read_diff): Detect integer overflow in buffer size calculations.
+
+	* cmp.c (word): New type.  All uses of `long' for
+	word-at-a-time comparisons changed to `word'.
+	(long_options, main, usage): Add `--help'.
+	(usage): Send usage to stdout, not stderr.
+	(main): Add `-v'.  Send version number to stdout, not stderr.
+
+	* configure.in (AC_HAVE_HEADERS): Add unistd.h; remove AC_UNISTD_H.
+
+Mon Sep 27 07:20:24 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* diff.c (add_exclude_file): Cast memchr to (char *)
+	to suppress bogus warnings on some nonstandard hosts.
+
+	* Makefile.in (cmp): Add version.o.
+
+	* analyze.c (diff_2_files): Work around memcmp bug with size=0.
+
+	* cmp.c (main, usage, version_string): Add --version option.
+
+	* system.h (malloc, realloc): Declare only if !HAVE_STDLIB_H.
+	(memchr): Declare only if !HAVE_MEMCHR.  These changes are
+	needed to keep some nonstandard hosts happy.
+
+	* util.c (memchr): Make first arg char const *
+	to match standard.
+	(xmalloc, xrealloc): Cast malloc, realloc
+	to (VOID *) to suppress bogus warnings on some nonstandard hosts.
+
+	* diff3.c (xmalloc, xrealloc): Cast malloc, realloc
+	to (VOID *) to suppress bogus warnings on some nonstandard hosts.
+
+	* sdiff.c (xmalloc, xrealloc): Cast malloc, realloc
+	to (VOID *) to suppress bogus warnings on some nonstandard hosts.
+	(lf_copy, lf_skip, lf_snarf): Cast memchr to (char *)
+	to suppress bogus warnings on some nonstandard hosts.
+	(memchr): Make first arg char const *
+	to match standard.
+
+Mon Sep 27 00:23:37 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* Version 2.5 released.
+
+	* analyze.c (diff_2_files): Work around memcmp bug with size=0.
+
+	* cmp.c (main, usage, version_string): Add --version option.
+	* Makefile.in (cmp): Add version.o.
+
+	* diff.c (add_exclude_file): Cast memchr to (char *)
+	to suppress bogus warnings on some nonstandard hosts.
+	* sdiff.c (lf_copy, lf_skip, lf_snarf): Likewise.
+
+	* diff3.c, sdiff.c, util.c (xmalloc, xrealloc): Cast malloc, realloc
+	to (VOID *) to suppress bogus warnings on some nonstandard hosts.
+
+	* sdiff.c, util.c (memchr): Make first arg char const *
+	to match standard.
+
+	* system.h (malloc, realloc): Declare only if !HAVE_STDLIB_H.
+	(memchr): Declare only if !HAVE_MEMCHR.  These changes are
+	needed to keep some nonstandard hosts happy.
+
+	* xmalloc.c: Include <sys/types.h> always; some nonstandard hosts
+	need it for size_t even if STDC_HEADERS.
+
+Sat Sep 18 01:33:07 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* configure.in (AC_STAT_MACROS_BROKEN): Add.
+	* system.h (S_IS{BLK,CHR,DIR,FIFO,REG,SOCK}): Fix defns if
+	STAT_MACROS_BROKEN.
+
+	* Makefile.in (diff3, sdiff, cmp): Do not link $(ALLOCA).
+
+	* analyze.c (discard_confusing_lines): Make defn static, like decl.
+	* sdiff.c (xmalloc): Likewise.
+
+	* ifdef.c (format_group): Ensure isdigit argument isn't < 0.
+
+	* side.c (print_half_line): Use isprint, since some hosts lack isgraph.
+	* util.c (output_1_line): Likewise.  Ensure its argument isn't < 0.
+	(xmalloc, xrealloc): Remove needless casts.
+
+	* system.h (volatile, const):
+	Define these before including any system headers,
+	so that they're used consistently in all system includes.
+	(getenv, malloc, realloc): Declare even if HAVE_STDLIB_H, since some
+	<stdlib.h>s don't declare them.
+	(memchr): Likewise for <string.h>.
+
+	* cmp.c, diff3.c, diff.h, sdiff.c: Include "system.h" first.
+	* diff.c: Remove redundant "system.h" inclusion.
+
+	* diff3.c (xmalloc): Now static.
+	(xmalloc, realloc): Remove needless casts.
+	(READNUM): Ensure isdigit argument isn't negative.
+
+Wed Sep 14 07:14:15 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* Version 2.4 released.
+
+	* ifdef.c (scan_char_literal): New function, for new %c'x' and
+	%c'\ooo' format specs.
+	(format_group, print_ifdef_lines): Use it.  Remove %0 format spec.
+
+	* cmp.c (cmp): Don't try to read past end of file; this doesn't
+	work on ttys.
+
+	* system.h, version.c: #include <config.h>, not "config.h", to allow
+	configuring in a separate directory when the source directory has
+	already been configured.
+	* Makefile.in (COMPILE): New defn, with proper -I options so that
+	`#include <config.h>' works.
+	(.c.o, diff3.o, sdiff.o): Use it.
+
+Mon Sep 13 06:45:43 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* diff.c (main, longopts): Add --line-format=FORMAT option.
+	(specify_format): Args no longer const pointers.  All callers changed.
+
+	* ifdef.c: Add support for %?c, %(A=B?T:E), PRINTF_SPECn formats.
+	(struct group): New struct.
+	(print_ifdef_lines): Use it to simplify argument passing.
+	Remove the convention that last arg -1 signifies that the lines
+	from file 2 are the same as the lines from file 1; this
+	convention no longer works, now that line numbers might be
+	printed out, since the line numbers may differ.
+	Add first FILE * argument to output to.  All callers changed.
+	Use a faster test for the single-fwrite optimization.
+	(format_group, scan_printf_spec, groups_letter_value): New functions.
+
+	* diff.h (group_format, line_format): No longer const pointers.
+	(format_ifdef): 1st arg is no longer const pointer.
+
+	* configure.in: Configure HAVE_LIMITS_H, HAVE_STDLIB_H.
+	* system.h <limits.h>, <stdlib.h>, <string.h>:
+	Include only if HAVE_LIMITS_H etc.
+
+	* system.h (memcmp, memcpy, strchr, strrchr, struct dirent): Prefer
+	these standard names to the traditional names (bcmp, bcpy, index,
+	rindex, struct direct).  All callers changed.
+
+	* system.h (PARAMS, VOID):
+	Define earlier so that malloc decl can use VOID.
+	(STAT_BLOCKSIZE): Simplify ersatz defn; just use 8K.
+
+Fri Sep  3 00:21:02 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* diff.c (compare_files): Two files with the same name must be
+	the same file; avoid a needless `stat' in that case.
+
+Fri Aug 27 06:59:03 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* Pervasive changes for portability to 64-bit hosts:
+	Add prototypes to function declarations.
+	Use size_t, not int, when needed.
+
+	* Other pervasive changes:
+	Use `const' more often.
+	Use STD{IN,OUT,ERR}_FILENO instead of [012].
+	Use 0, not NULL, for portability to broken hosts.
+
+	* Makefile.in: (srcs, objs, distfiles, cmp): New files cmpbuf.[ch].
+	(distfiles): New files config.h.in, mkinstalldirs.
+	(.c.o): Add -DHAVE_CONFIG_H.
+
+	* analyze.c: (diag): Pacify `gcc -Wall' with a useless assignment.
+	(diff_2_files): Use l.c.m., not max, of files' buffer sizes.
+
+	* cmp.c: Make globals static when possible.
+
+	(file): Now a 2-element array; replaces `file1' and `file2'.
+	(file_desc, buffer): Likewise, for file[12]_desc and buf[12].
+	(main): Likewise, for stat_buf[12].  Index these variables with `i'.
+
+	(ignore_initial): New var.
+	(long_options): Now const.  Add `--ignore-initial'.
+	(usage): Sort options and add `--ignore-initial'.
+	(main, cmp): Add `--ignore-initial' support.
+
+	(main): `cmp - -' now succeeds.
+	When comparing standard input to a file, and using a shortcut (e.g.
+	looking at file sizes or inode numbers), take the lseek offset into
+	account before deciding whether the files are identical.
+	Avoid mentioning `dev_t', `ino_t' for portability to nonstandard hosts.
+	Use l.c.m. of files' buffer sizes, not 8 * 1024.
+	ferror (stdout) does not imply errno has a useful value.
+	If 2nd file is "-", treat it first, in case stdin is closed.
+
+	(cmp): Always compute `char_number', `smaller' for speed and simplicity.
+	Say `cmp: EOF on input', not `/usr/gnu/bin/cmp: EOF on input',
+	as per Posix.2.
+
+	(block_compare_and_count): Increment line_number argument.
+	Remove end_char argument; it's always '\n'.  All callers changed.
+	Do not assume sizeof(long) == 4; this isn't true on some 64-bit hosts.
+	(block_compare): Minimize differences with block_compare_and_count.
+
+	(block_read): Coalesce `bp += nread's.
+
+	(printc): Remove `FILE *' arg; output to stdout.  All callers changed.
+
+	* configure.in: Configure HAVE_SIGACTION, RETSIGTYPE, HAVE_VPRINTF.
+	Configure into config.h.
+
+	* context.c (print_context_label):
+	Standard input's st_mtime is no longer a special case
+	here, since `compare_files' now sets it to the current time.
+
+	* diff.c (usage): Sort options.
+	(filetype): New function.
+	(compare_files): Set stdin's st_mtime to be the current time.
+	Leave its name "-" instead of changing it to "Standard Input";
+	to test whether a file is stdin, we must compare its name to "-" instead
+	of its desc to 0, since if it's closed other file descs may be 0.
+	When comparing standard input to a file, and using a shortcut (e.g.
+	looking at file sizes or inode numbers), take the lseek offset into
+	account before deciding whether the files are identical.
+	Pretend that nonexistent files have the same filetype as existing files.
+	Rename `errorcount' to `failed', since it's boolean.
+	In directory comparisons, if a file is neither a regular file nor a
+	directory, just print its type and the other file's type.
+
+	* diff.h (Is_space, textchar): Remove.
+	(struct msg, msg_chain, msg_chain_end): Move to util.c.
+	(VOID): Move to system.h.
+	(line_cmp, version_string, change_letter, print_number_range,
+	find_change): New decls.
+
+	* diff.texi:
+	whitespace -> white space.  It now stands for whatever isspace yields.
+	Add --ignore-initial.
+
+	* diff3.c (VOID): Move to system.h.
+	(version_string): Now char[].
+	(usage): Sort options.
+	(process_diff): Pacify `gcc -Wall' with a useless assignment.
+	(read_diff): pid is of type pid_t, not int.  Use waitpid if available.
+	(output_diff3): Simplify test for `\ No newline at end of file' message.
+
+	* dir.c (struct dirdata): Rename `files' to `names' to avoid confusion
+	with external struct file_data `files'.
+
+	* io.c (line_cmp): Move declaration to diff.h.
+	(textchar): Remove.
+	(find_and_hash_each_line): Use locale's definition of white space
+	instead of using one hardwired defn for -b and another for -w.
+
+	* normal.c (change_letter, print_number_range, find_change):
+	Move decls to diff.h.
+	(print_normal_hunk): Now static.
+
+	* sdiff.c (SEEK_SET): Move to system.h.
+	(version_string): Now char[], not char*.
+	(private_tempnam): Remove hardcoded limit on temporary file names.
+	(exiterr, perror_fatal, main): When exiting because of a signal,
+	exit with that signal's status.
+	(lf_refill, main, skip_white, edit, interact): Check for signal.
+	(ignore_SIGINT): Renamed from `ignore_signals'.
+	(NUM_SIGS, initial_handler): New macros.
+	(initial_action, signal_received, sigs_trapped): New vars.
+	(catchsig, trapsigs): Use sigaction if possible, since this closes the
+	windows of vulnerability that `signal' has.  Use RETSIGTYPE not void.
+	When a signal comes in, just set a global variable; this is safer.
+	(checksigs, untrapsig): New functions.
+	(edit): Pacify `gcc -Wall' with a useless assignment.
+	Respond to each empty line with help, not to every other empty line.
+	(private_tempnam): Remove hardcoded limit on temporary file name length.
+	Don't assume sizeof (pid_t) <= sizeof (int).
+
+	* system.h: (S_IXOTH, S_IXGRP, S_IXUSR,
+	SEEK_SET, SEEK_CUR,
+	STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO):
+	New macros, if system doesn't define them.
+	(volatile): Don't define if already defined.
+	(PARAMS): New macro.
+	(VOID): Move here from diff.h.
+
+	* util.c (struct msg, msg_chain, msg_chain_end): Moved here from diff.h.
+	(message5): New function.
+	(pr_pid): New var.
+	(begin_output): Allocate `name' more precisely.
+	Put child pid into pr_pid, so that we can wait for it later.
+	Don't check execl's return value, since any return must be an error.
+	(finish_output): Detect and report output errors.
+	Use waitpid if available.  Check pr exit status.
+	(line_cmp): Use locale's definition of white space
+	instead of using one hardwired defn for -b and another for -w.
+	(analyze_cmp): Avoid double negation with `! nontrivial'.
+	Pacify `gcc -Wall' be rewriting for-loop into do-while-loop.
+	(dir_file_pathname): New function.
+
+	* version.c (version_string): Now char[], not char*.
+
+Thu Jul 29 20:44:30 1993  David J. MacKenzie  (djm@wookumz.gnu.ai.mit.edu)
+
+	* Makefile.in (config.status): Run config.status --recheck, not
+	configure, to get the right args passed.
+
+Thu Jul 22 10:46:30 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* Makefile.in (dist): Replace `if [ ! TEST ]; then ACTION; fi'
+	with `[ TEST ] || ACTION || exit' so that the containing for-loop exits
+	with proper status for `make'.
+
+Thu Jul  8 19:47:22 1993  David J. MacKenzie  (djm@goldman.gnu.ai.mit.edu)
+
+	* Makefile.in (installdirs): New target.
+	(install): Use it.
+	(Makefile, config.status, configure): New targets.
+
+Sat Jun  5 23:10:40 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* Makefile.in (dist): Switch from .z to .gz.
+
+Wed May 26 17:16:02 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* diff.c (main): Cast args to compare_files, for traditional C.
+	* side.c (print_sdiff_common_lines_print_sdiff_hunk): Likewise.
+	* analyze.c, diff3.c, sdiff.c, util.c: Don't assume NULL is defined
+	properly.
+
+Tue May 25 14:54:05 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* analyze.c (diff_2_files):  With -q, do not report that files differ
+	if all their differences are ignored.
+	(briefly_report): New function.
+	* diff.h (ignore_some_changes): New variable.
+	* diff.c (compare_files): Don't use the file size shortcut if
+	ignore_some_changes is nonzero, since the file size may differ
+	merely due to ignored changes.
+	(main):  Set ignore_some_changes if we might ignore some changes.
+	Remove unsystematic assignment of 0 to static vars.
+	* io.c (read_files): New argument PRETEND_BINARY says whether to
+	pretend the files are binary.
+
+	* diff3.c (tab_align_flag): New variable, for new -T option.
+	(main, usage, output_diff3): Add support for -T.
+
+Sun May 23 15:25:29 1993  Richard Stallman  (rms@mole.gnu.ai.mit.edu)
+
+	* dir.c (dir_sort): Always init `data' to avoid GCC warning.
+
+Sat May 22 15:35:02 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* Makefile.in (dist): Change name of package from diff to diffutils.
+	Don't bother to build .Z dist; .z suffices.
+
+Fri May 21 16:35:22 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* diff.c: Include "system.h" to get memchr declaration.
+	* system.h (memchr): Declare if !HAVE_MEMCHR, not if
+	!HAVE_MEMCHR && !STDC_HEADERS.
+
+Wed May 19 17:43:55 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* Version 2.3 released.
+
+Fri Apr 23 17:18:44 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* io.c (find_identical_ends): Do not discard the last HORIZON_LINES
+	lines of the prefix, or the first HORIZON_LINES lines of the suffix.
+	* diff.c (main, longopts, usage): Add --horizon-lines option.
+	* diff3.c (main, process_diff, read_diff): Invoke second diff
+	with --horizon-lines determined by the first diff.
+	* diff.h, diff3.c (horizon_lines): New variable.
+
+Mon Mar 22 16:16:00 1993  Roland McGrath  (roland@churchy.gnu.ai.mit.edu)
+
+	* system.h [HAVE_STRING_H || STDC_HEADERS] (bcopy, bcmp, bzero):
+	Don't define if already defined.
+
+Fri Mar  5 00:20:16 1993  Richard Stallman  (rms@mole.gnu.ai.mit.edu)
+
+	* diff.c (main): Use NULL in arg to compare_files.
+
+Thu Feb 25 15:26:01 1993  Roland McGrath  (roland@churchy.gnu.ai.mit.edu)
+
+	* system.h: Declare memchr #if !HAVE_MEMCHR && !STDC_HEADERS,
+	not #if !HAVE_MEMCHR || !STDC_HEADERS.
+
+Mon Feb 22 15:04:46 1993  Richard Stallman  (rms@geech.gnu.ai.mit.edu)
+
+	* io.c (find_identical_ends): Move complicated arg outside GUESS_LINES.
+
+Mon Feb 22 12:56:12 1993  Roland McGrath  (roland@churchy.gnu.ai.mit.edu)
+
+	* Makefile.in (.c.o): Add -I$(srcdir); put $(CFLAGS) last before $<.
+
+Sat Feb 20 19:18:56 1993  Richard Stallman  (rms@mole.gnu.ai.mit.edu)
+
+	* io.c (binary_file_p): Return zero if file size is zero.
+
+Fri Feb 19 17:31:32 1993  Roland McGrath  (roland@geech.gnu.ai.mit.edu)
+
+	* Version 2.2 released.
+
+	* system.h [HAVE_STRING_H || STDC_HEADERS] (index, rindex): Don't
+	define if already defined.
+
+Wed Feb 17 17:08:00 1993  Roland McGrath  (roland@churchy.gnu.ai.mit.edu)
+
+	* Makefile.in (srcs): Remove limits.h.
+
+Thu Feb 11 03:36:00 1993  Richard Stallman  (rms@mole.gnu.ai.mit.edu)
+
+	* diff3.c (xmalloc): No longer static.
+
+	* sdiff.c (edit): Allocate buf dynamically.
+
+	* dir.c (dir_sort): Handle VOID_CLOSEDIR.
+
+Wed Feb 10 00:15:54 1993  Richard Stallman  (rms@mole.gnu.ai.mit.edu)
+
+	* limits.h: File deleted (should never have been there).
+
+Tue Feb  9 03:53:22 1993  Richard Stallman  (rms@mole.gnu.ai.mit.edu)
+
+	* Makefile.in (.c.o, diff3.o, sdiff.o): Put $(CFLAGS) last.
+
+Wed Feb  3 15:42:10 1993  David J. MacKenzie  (djm@goldman.gnu.ai.mit.edu)
+
+	* system.h: Don't #define const; let configure do it.
+
+Mon Feb  1 02:13:23 1993  Paul Eggert  (eggert@hal.gnu.ai.mit.edu)
+
+	* Version 2.1 released.
+
+	* Makefile.in (dist): Survive ln failures.  Create .tar.z
+	(gzipped tar) file as well as .tar.Z (compressed tar) file.
+
+Fri Jan  8 22:31:41 1993  Paul Eggert  (eggert@twinsun.com)
+
+	* side.c (print_half_line): When the input position falls
+	outside the column, do not output a tab even if the output
+	position still falls within the column.
+
+Mon Dec 21 13:54:36 1992  David J. MacKenzie  (djm@kropotkin.gnu.ai.mit.edu)
+
+	* Makefile.in (.c.o): Add -I.
+
+Fri Dec 18 14:08:20 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* configure.in: Add HAVE_FCNTL_H, since system.h uses it.
+
+Tue Nov 24 10:06:48 1992  David J. MacKenzie  (djm@goldman.gnu.ai.mit.edu)
+
+	* Makefile.in: Note change from USG to HAVE_STRING_H.
+
+Mon Nov 23 18:44:00 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* io.c (find_and_hash_each_line): When running out of lines,
+	double the number of allocated lines, instead of just doubling
+	that number minus the prefix lines.  This is more likely to
+	avoid the need for further memory allocation.
+
+Wed Nov 18 20:40:28 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* dir.c (dir_sort): Plug memory leak: space holding directory
+	contents was not being reclaimed.  Get directory size from
+	struct file_data for initial guess at memory needed.
+	Detect errors when reading and closing directory.
+	(diff_dirs): Pass struct file_data to dir_sort.  Finish plugging leak.
+	* diff.c (compare_files): Pass struct file_data to diff_dirs.
+
+	* io.c (find_and_hash_each_line): Don't assume alloc_lines is
+	nonzero when allocating more lines.
+
+Thu Nov 12 16:02:18 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* diff.c (main): Add `-U lines' as an alias for `--unified=lines'.
+
+	* diff3.c (usage): Add third --label option in example.
+
+	* util.c (analyze_hunk): Fix test for ignoring blank lines.
+
+	* configure.in, system.h: Avoid USG; use HAVE_TIME_H etc. instead.
+
+Mon Nov  9 05:13:25 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* diff3.c (main, usage): Add -A or --show-all.
+	-m now defaults to -A, not -E.  Allow up to three -L options.
+	(output_diff3_edscript, output_diff3_merge):
+	Remove spurious differences between these two functions.
+	Output ||||||| for -A.  Distinguish between conflicts and overlaps.
+	(dotlines, undotlines): New functions that output `Ns', not `N,Ns'.
+	(output_diff3_edscript, output_diff3_merge): Use them.
+
+	* io.c (find_identical_ends): shift_boundaries needs an extra
+	identical line at the end, not at the beginning.
+
+	* sdiff.c (edit): execvp wants char **, not const char **.
+
+Mon Oct 19 04:39:32 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* context.c (print_context_script, find_function): Context
+	line numbers start with - file->prefix_lines, not 0.
+
+	* io.c (binary_file_p): Undo last change; it was a library bug.
+
+Sun Oct 18 00:17:29 1992  Richard Stallman  (rms@mole.gnu.ai.mit.edu)
+
+	* io.c (binary_file_p): Consider empty file as non-binary.
+
+Mon Oct  5 05:18:46 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* diff3.c (main, make_3way_diff, using_to_diff3_block): Don't
+	report bogus differences (for one of -mexEX3) just because the
+	file0-file1 diffs don't line up with the file0-file2 diffs.
+	(This is entirely possible since we don't use diff's -n
+	option.)  Always compare file1 to file2, so that diff3 sees
+	those changes directly.  Typically, file2 is now the common
+	file, not file0.
+	(output_diff3_merge): The input file is file 0, not the common file.
+
+	(FC, FO): New macros; they replace FILE1, FILE0 for two-way diffs,
+	to distinguish them from three-way diffs.
+
+	* diff3.c (using_to_diff3_block): Fold repeated code into loops.
+
+	* diff3.c (make_3way_diff, process_diff): Have the *_end
+	variable point to the next field to be changed, not to the last
+	object allocated; this saves an if-then-else.
+
+	* diff3.c (process_diff): Use D_NUMLINES instead of its definiens.
+
+	* diff3.c: Make fns and vars static unless they must be external.
+
+Wed Sep 30 09:21:59 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* analyze.c (diff_2_files): OUTPUT_IFDEF is now robust.
+	* diff.h (ROBUST_OUTPUT_STYLE): Likewise.
+	(default_line_format): Remove.  All refs removed.
+
+	* ifdef.c (print_ifdef_lines): Add %L.  Optimize %l\n even if user
+	specified it, as opposed to its being the default.
+
+Tue Sep 29 19:01:28 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* diff.c (longopts, main): --{old,new,unchanged,changed}--group-format
+	are new options, so that -D is no longer overloaded.  Set
+	no_diff_means_no_output if --unchanged-{line,group}-format allows it.
+	* diff.h (enum line_class): New type.
+	(group_format, line_format): Use it to regularize option flags.
+	All refs changed.
+
+	* ifdef.c (format_ifdef, print_ifdef_lines): %n is no longer a format.
+
+Mon Sep 28 04:51:42 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* diff.c (main, usage): Replace --line-prefix with the more general
+	--{old,new,unchanged}-line-format options.
+	* ifdef.c (format_ifdef, print_ifdef_lines): Likewise.
+	* diff.h (line_format): Renamed from line_prefix.  All refs changed.
+	* diff.h, ifdef.c (default_line_format): New variable.
+	* util.c (output_1_line): New function.
+	(print_1_line): Use it.
+
+	* ifdef.c: (format_ifdef, print_ifdef_lines): Add %0 format.
+
+Sun Sep 27 05:38:13 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* diff.c (main): Add -E or --line-prefix option.  Add -D'=xxx'
+	for common lines.  Change default -D< format from copy of -D>
+	format to to -D<%<; similarly for default -D> format.
+	* diff.h (common_format, line_prefix): New variables.
+	* ifdef.c (format_ifdef): New function.
+	(print_ifdef_script, print_ifdef_hunk, print_ifdef_lines):
+	Use it for -D'=xxx', -E.
+
+	* context.c (find_hunk): Glue together two non-ignorable changes that
+	are exactly CONTEXT * 2 lines apart.  This shortens output, removes
+	a behavioral discontinuity at CONTEXT = 0, and is more compatible
+	with traditional diff.
+
+	* io.c (find_identical_ends): Slurp stdin at most once.
+
+	* util.c (print_line_line): line_flag is const char *.
+
+Thu Sep 24 15:18:07 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* ifdef.c (print_ifdef_lines): New function, which fwrites a sequence
+	of lines all at once for speed.
+	(print_ifdef_script, print_ifdef_hunk): Use it.
+
+Thu Sep 24 05:54:14 1992  Paul Eggert  (eggert@twinsun.com)
+
+	* diff.c (main): Support new -D options for if-then-else formats.
+	(specify_format): New function.
+	* diff.h (ifndef_format, ifdef_format, ifnelse_format): New variables.
+	* ifdef.c (print_ifdef_hunk): Use the new variables instead of
+	a hardwired format.
+
+	* side.c (print_1sdiff_line): Represent incomplete lines on output.
+	(print_sdiff_script): Likewise.  Don't print 'q' at end,
+	since that doesn't work with incomplete lines.
+	* sdiff.c (interact): Don't assume diff output ends with 'q' line.
+	* diff.h (ROBUST_OUTPUT_STYLE): OUTPUT_SDIFF is now robust.
+
+	* sdiff.c (lf_copy, lf_snarf): Use memchr instead of index,
+	to avoid dumping core when files contain null characters.
+	(memchr): New function (if memchr is missing).
+
+	* io.c (sip): New arg SKIP_TEST to skip test for binary file.
+	(read_files): Don't bother testing second file if first is binary.
+
+Thu Sep 17 21:17:49 1992  David J. MacKenzie  (djm@nutrimat.gnu.ai.mit.edu)
+
+	* system.h [!USG && !_POSIX_VERSION]: Protect from conflicting
+	prototype for wait in sys/wait.h.
+
+Wed Sep 16 12:32:18 1992  David J. MacKenzie  (djm@nutrimat.gnu.ai.mit.edu)
+
+	* Makefile.in: Include binprefix in -DDIFF_PROGRAM.
+
+Tue Sep 15 14:27:25 1992  David J. MacKenzie  (djm@nutrimat.gnu.ai.mit.edu)
+
+	* Version 2.0.
+
+Sat Sep 12 01:31:19 1992  David J. MacKenzie  (djm@nutrimat.gnu.ai.mit.edu)
+
+	* util.c, diff.h, system.h [!HAVE_MEMCHR]: Don't use void *
+	and const when declaring memchr replacement.  Declare memchr
+	if !STDC_HEADERS && !USG.
+
+Thu Sep 10 15:17:32 1992  David J. MacKenzie  (djm@nutrimat.gnu.ai.mit.edu)
+
+	* Makefile.in (uninstall): New target.
+
+	* diff.c (excluded_filename): Use fnmatch, not wildmat.
+	(usage): Document -x, -X, --exclude, --exclude-from.
+	Makefile.in: Use fnmatch.c, not wildmat.c.
+
+Sun Sep  6 23:46:25 1992  Paul Eggert (eggert@twinsun.com)
+
+	* configure.in: Add HAVE_MEMCHR.
+	* diff.h, util.c: Use it instead of MEMCHR_MISSING.
+
+Sun Sep  6 07:25:49 1992  Paul Eggert (eggert@twinsun.com)
+
+	* diff.h: (struct line_def): Replace this 3-word struct with char *.
+	This uses less memory, particularly for large files with short lines.
+	(struct file_data): New member linbuf_base counts number of lines
+	in common prefix that are not recorded in linbuf;
+	this uses less memory if files are identical or differ only at end.
+	New member buffered_lines counts possibly differing lines.
+	New member valid_lines counts valid data.
+	New member alloc_lines - linbuf_base replaces old linbufsize.
+	linbuf[0] now always points at first differing line.
+	Remove unused members ltran, suffix_lines.
+	Add const where appropriate.
+	(Is_space): New macro, for consistent definition of `white space'.
+	(excluded_filename, memchr, sip, slurp): New declarations.
+	* ed.c (print_ed_hunk): Adjust to diff.h's struct changes.
+	* context.c (pr_context_hunk): Likewise.
+	* ifdef.c (print_ifdef_script): Likewise.
+	* side.c (print_sdiff_script, print_half_line): Likewise.
+	* util.c (analyze_hunk, line_cmp, print_1_line): Likewise.
+
+	* analyze.c (shift_boundaries): Remove unneeded variable `end' and
+	unnecessary comparisons of `preceding' and `other_preceding' against 0.
+	(diff_2_files): When comparing files byte-by-byte for equality,
+	don't slurp them all in at once; just compare them a buffer at a time.
+	This can win big if they differ early on.
+	Move some code to compare_files to enable this change.
+	Use only one buffer for stdin with `diff - -'.
+	(discard_confusing_lines, diff_2_files): Coalesce malloc/free calls.
+	(build_script): Remove obsolete OUTPUT_RCS code.
+
+	* diff.c (add_exclude, add_exclude_file, excluded_filename): New fns.
+	(main): Use them for the new --exclude and --exclude-from options.
+	(compare_files): Don't open a file unless it must be read.
+	Treat `diff file file' and `diff file dir' similarly.
+	Move some code here from diff_2_files to enable this.
+	Simplify file vs dir warning.
+
+	* dir.c (dir_sort): Support new --exclude* options.
+
+	* io.c (struct equivclass): Put hash code and line length here instead
+	of struct line_def, so that they can be shared.
+	(find_and_hash_each_line): Compute equivalence class as we go,
+	instead of doing it in a separate pass; this thrashes memory less.
+	Make buckets realloc-able, since we can't preallocate them.
+	Record one more line start than there are lines, so that we can compute
+	any line's length by subtracting its start from the next line's,
+	instead of storing the length explicitly.  This saves memory.
+	Move prefix-handling code to find_identical_ends;
+	this wins with large prefixes.
+	Use Is_space, not is_space, for consistent treatment of white space.
+	(prepare_text_end): New function.
+	(find_identical_ends): Move slurping here, so it's only done when
+	needed.  Work even if the buffers are the same (because of `diff - -').
+	Compare prefixes a word at a time for speed.
+	(find_equiv_class): Delete; now done by find_and_hash_each_line.
+	(read_files): Don't slurp unless needed.
+	find_equiv_class's work is now folded into find_and_hash_each_line.
+	Don't copy stdin buffer if `diff - -'.
+	Check for running out of primes.
+	(sip, slurp): Split first part of `slurp' into another function `sip'.
+	`sip' sets things up and perhaps reads the first ST_BLKSIZE buffer to
+	see whether the file is binary; `slurp' now just finishes the job.
+	This lets diff_2_files compare binary files lazily.
+	Allocate a one-word sentinel to allow word-at-a-time prefix comparison.
+	Count prefix lines only if needed, only count the first file's prefix.
+	Don't bother to count suffix lines; it's never needed.
+	Set up linbuf[0] to point at first differing line.
+	(binary_file_p): Change test for binary files:
+	if it has a null byte in its first buffer, it's binary.
+	(primes): Add more primes.
+
+	* util.c (line_cmp): Use bcmp for speed.
+	Use Is_space, not is_space, for consistent treatment of white space.
+	(translate_line_number): Internal line numbers now count from 0
+	starting after the prefix.
+	(memchr): New function (if memchr is missing).
+
+	* Makefile.in: Document HAVE_ST_BLKSIZE.  Link with wildmat.o.
+	* system.h (STAT_BLOCKSIZE): New macro based on HAVE_ST_BLKSIZE.
+	* configure.in: Add AC_ST_BLKSIZE.
+	* wildmat.c: New file.
+
+Fri Sep  4 01:28:51 1992  Richard Stallman  (rms@mole.gnu.ai.mit.edu)
+
+	* sdiff.c (xmalloc): Renamed from ck_malloc.  Callers changed.
+
+Thu Sep  3 15:28:59 1992  David J. MacKenzie  (djm@nutrimat.gnu.ai.mit.edu)
+
+	* diff.h: Don't declare free, index, rindex.
+
+Tue Aug 11 22:18:06 1992  John Gilmore  (gnu at cygnus.com)
+
+	* io.c (binary_file_p):  Use heuristic to avoid declaring info
+	files as binary files.  Allow about 1.5% non-printing
+	characters (in info's case, ^_).
+
+Tue Jul  7 01:09:26 1992  David J. MacKenzie  (djm@nutrimat.gnu.ai.mit.edu)
+
+	* diff.h: Replace function_regexp and ignore_regexp with lists
+	of compiled regexps.
+	* analyze.c, context.c, util.c: Test whether the lists, not
+	the old variables, are empty.
+	* util.c (analyze_hunk), context.c (find_function): Compare
+	lines with the lists of regexps.
+	* diff.c (add_regexp): New function.
+	(main): Use it.
+
+	* diff3: Add -v --version option.
+	* Makefile.in: Link with version.o.
+
+	* system.h: New file.
+	* diff.h, cmp.c, diff3.c, sdiff.c: Use it.
+
+	* diff.h, diff3.c: Include string.h or strings.h, as appropriate.
+	Declare malloc and realloc.
+
+	* diff3.c (perror_with_exit): Include program name in message.
+
+	* diff3.c: Lowercase error messages for GNU standards.
+
+	* sdiff.c [USG || STDC_HEADERS]: Define bcopy in terms of memcpy.
+
+	* sdiff.c: Use the version number from version.c.
+	* Makefile.in: Link with version.o.
+
+	* cmp.c error.c xmalloc.c: New files from textutils.
+	* Makefile.in: Add rules for them.
+
+	* diff.c (longopts): --unidirectional-new-file is like -P, not -N.
+	Rename --file-label to --label (leave old name, but undocumented).
+
+	* sdiff.c, diff.c (usage): Condense messages and fix some errors.
+
+	* diff3.c (main, usage): Add long-named options.
+
+Fri Jul  3 14:31:18 1992  David J. MacKenzie  (djm@nutrimat.gnu.ai.mit.edu)
+
+	* diff.h, diff3.c, sdiff.c: Change FOO_MISSING macros to HAVE_FOO.
+
+Thu Jun 25 16:59:47 1992  David J. MacKenzie  (djm@apple-gunkies.gnu.ai.mit.edu)
+
+	* diff.c: --reversed-ed -> --forward-ed.
+
+Wed Feb 26 12:17:32 1992  Paul Eggert  (eggert@yata.uucp)
+
+	* analyze.c, diff.c, diff.h, io.c: For -y, compare even if same file.
+
+Fri Feb 14 22:46:38 1992  Richard Stallman  (rms@mole.gnu.ai.mit.edu)
+
+	* io.c, diff3.c, analyze.c: Add extra parentheses.
+
+Sun Feb  9 00:22:42 1992  Richard Stallman  (rms@mole.gnu.ai.mit.edu)
+
+	* diff.h (unidirectional_new_file_flag): New variable.
+	* diff.c (main): Set that for -P.
+	(compare_files): Support -P, somewhat like -N.
+	(longopts): Support long name for -P.
+
+Sat Jan  4 20:10:34 1992  Paul Eggert (eggert at yata.uucp)
+
+	* Makefile.in: Distribute diff.info-* too.
+
+	* README, sdiff.c: version number now matches version.c.
+
+	* configure: Fix and document vfork test.
+
+	* ifdef.c: Don't dump core if `diff -Dx f f'.
+
+Mon Dec 23 23:36:08 1991  David J. MacKenzie  (djm at wookumz.gnu.ai.mit.edu)
+
+	* diff.h, diff3.c, sdiff.c: Change POSIX ifdefs to
+	HAVE_UNISTD_H and _POSIX_VERSION.
+
+Wed Dec 18 17:00:31 1991  David J. MacKenzie  (djm at wookumz.gnu.ai.mit.edu)
+
+	* Makefile.in (srcs): Add sdiff.c.
+	(tapefiles): Add diff.texi and diff.info.
+
+	* diff.h, diff3.c, sdiff.c: Use HAVE_VFORK_H instead of
+	VFORK_HEADER and VFORK_WORKS.
+
+Tue Dec 17 00:02:59 1991  Paul Eggert  (eggert at yata.uucp)
+
+	* Makefile.in (all): Add diff.info, sdiff.
+
+	* configure, diff.c, sdiff.c:
+	Prefix long options with `--', not `+'.
+	* diff.c: Regularize option names.
+
+	* configure: Fix check for vfork.
+	* configure, diff.c, diff.h, diff3.c, sdiff.c:
+	Use Posix definitions when possible.
+
+	* context.c: Align context with tab if -T is given.  Tune.
+	* diff.c, diff.h, side.c: Calculate column widths so that tabs line up.
+	* io.c: Add distinction between white space and printing chars.
+	* side.c: Don't expand tabs unless -t is given.
+	* side.c, util.c: Tab expansion now knows about '\b', '\f', '\r', '\v'.
+	* util.c: -w skips all white space.  Remove lint.  Tune.
+
+	* sdiff.c: Support many more diff options, e.g. `-', `sdiff file dir'.
+	Ignore interrupts while the subsidiary editor is in control.
+	Clean up temporary file and kill subsidiary diff if interrupted.
+	Ensure subsidiary diff doesn't ignore SIGPIPE.
+	Don't get confused while waiting for two subprocesses.
+	Don't let buffers overflow.  Check for I/O errors.
+	Convert to GNU style.  Tune.
+
+	* sdiff.c, util.c: Don't lose errno.
+	Don't confuse sdiff with messages like `Binary files differ'.
+	* sdiff.c, side.c: Don't assume that common lines are identical.
+	Simplify --sdiff-merge-assist format.
+
+Mon Sep 16 16:42:01 1991  Tom Lord  (lord at churchy.gnu.ai.mit.edu)
+
+	* Makefile.in, sdiff.c: introduced sdiff front end to diff.
+
+	* Makefile.in, analyze.c, diff.c, diff.h, io.c, side.c: Added
+	sdiff-style output format to diff.
+
+Mon Aug 26 16:44:55 1991  David J. MacKenzie  (djm at pogo.gnu.ai.mit.edu)
+
+	* Makefile.in, configure: Only put $< in Makefile if using VPATH,
+	because older makes don't understand it.
+
+Fri Aug  2 12:22:30 1991  David J. MacKenzie  (djm at apple-gunkies)
+
+	* configure: Create config.status.  Remove it and Makefile if
+	interrupted while creating them.
+
+Thu Aug  1 22:24:31 1991  David J. MacKenzie  (djm at apple-gunkies)
+
+	* configure: Check for +srcdir etc. arg and look for
+	Makefile.in in that directory.  Set VPATH if srcdir is not `.'.
+	* Makefile.in: Get rid of $(archpfx).
+
+Tue Jul 30 21:28:44 1991  Richard Stallman  (rms at mole.gnu.ai.mit.edu)
+
+	* Makefile.in (prefix): Renamed from DESTDIR.
+
+Wed Jul 24 23:08:56 1991  David J. MacKenzie  (djm at wookumz.gnu.ai.mit.edu)
+
+	* diff.h, diff3.c: Rearrange ifdefs to use POSIX,
+	STDC_HEADERS, VFORK_MISSING, DIRENT.  This way it works on
+	more systems that aren't pure USG or BSD.
+	Don't not define const if __GNUC__ is defined -- that would
+	break with -traditional.
+	* configure: Check for those features.
+
+Wed Jul 10 01:39:23 1991  David J. MacKenzie  (djm at wookumz.gnu.ai.mit.edu)
+
+	* configure, Makefile.in: $(INSTALLPROG) -> $(INSTALL).
+
+Sat Jul  6 16:39:04 1991  David J. MacKenzie  (djm at geech.gnu.ai.mit.edu)
+
+	* Replace Makefile with configure and Makefile.in.
+	Update README with current compilation instructions.
+
+Sat Jul  6 14:03:29 1991  Richard Stallman  (rms at mole.gnu.ai.mit.edu)
+
+	* util.c (setup_output): Just save the args for later use.
+	(begin_output): Do the real work, with the values that were saved.
+	It's safe to call begin_output more than once.
+	Print the special headers for context format here.
+	* analyze.c (diff_2_files): Don't print special headers here.
+	* context.c (pr_context_hunk, pr_unidiff_hunk): Call begin_output.
+	* ed.c (print_ed_hunk, print_forward_ed_hunk, print_rcs_hunk):
+	* normal.c (print_normal_hunk): Likewise.
+	* ifdef.c (print_ifdef_hunk): Likewise.
+	* util.c (finish_output): Don't die if begin_output was not called.
+
+Thu Jun 20 23:10:01 1991  David J. MacKenzie  (djm at geech.gnu.ai.mit.edu)
+
+	* Makefile: Add TAGS, distclean, and realclean targets.
+	Set SHELL.
+
+Tue Apr 30 13:54:36 1991  Richard Stallman  (rms at mole.gnu.ai.mit.edu)
+
+	* diff.h (TRUE, FALSE): Undefine these before defining.
+
+Thu Mar 14 18:27:27 1991  Richard Stallman  (rms@mole.ai.mit.edu)
+
+	* Makefile (objs): Include $(ALLOCA).
+
+Sat Mar  9 22:34:03 1991  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* diff.h: Include regex.h.
+
+Thu Feb 28 18:59:53 1991  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* Makefile (diff3): Link with GNU getopt.
+
+Sat Feb 23 12:49:43 1991  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* io.c (find_equiv_class): Make hash code unsigned before mod.
+
+	* diff.h (files): Add EXTERN.
+
+Sun Jan 13 21:33:01 1991  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* diff.c: +print option renamed +paginate.  Remove +all-text.
+
+Mon Jan  7 06:18:01 1991  David J. MacKenzie  (djm at geech.ai.mit.edu)
+
+	* Makefile (dist): New target, replacing diff.tar and
+	diff.tar.Z, to encode version number in distribution directory
+	and tar file names.
+
+Sun Jan  6 18:42:23 1991  Michael I Bushnell  (mib at geech.ai.mit.edu)
+
+	* Version 1.15 released.
+
+	* version.c: Updated from 1.15 alpha to 1.15
+
+	* context.c (print_context_number_range,
+	print_unidiff_number_range): Don't print N,M when N=M, print
+	just N instead.
+
+	* README: Updated for version 1.15.
+	Makefile: Updated for version 1.15.
+
+	* diff3.c (main): Don't get confused if one of the arguments
+	is a directory.
+
+	* diff.c (compare_files): Don't get confused if comparing
+	standard input to a directory; print error instead.
+
+	* analyze.c (diff_2_files), context.c (print_context_header,
+	print_context_script), diff.c (main), diff.h (enum
+	output_style): Tread unidiff as an output style in its own
+	right.  This also generates an error when both -u and -c are
+	given.
+
+	* diff.c (main): Better error messages when regexps are bad.
+
+	* diff.c (compare_files): Don't assume stdin is opened.
+
+	* diff3.c (read_diff): Don't assume things about the order of
+	descriptor assignment and closes.
+
+	* util.c (setup_output): Don't assume things about the order
+	of descriptor assignment and closes.
+
+	* diff.c (compare_files): Set a flag so that closes don't
+	happen more than once.
+
+	* diff.c (main): Don't just flush stdout, do a close.  That
+	way on broken systems we can still get errors.
+
+Mon Dec 24 16:24:17 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* diff.c (usage): Use = for args of long options.
+
+Mon Dec 17 18:19:20 1990  Michael I Bushnell  (mib at geech.ai.mit.edu)
+
+	* context.c (print_context_label): Labels were interchanged badly.
+
+	* context.c (pr_unidiff_hunk): Changes to deal with files
+	ending in incomplete lines.
+	* util.c (print_1_line): Other half of the changes.
+
+Mon Dec  3 14:23:55 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* diff.c (longopts, usage): unidiff => unified.
+
+Wed Nov  7 17:13:08 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* analyze.c (diff_2_files): No warnings about newlines for -D.
+
+	* diff.c (pr_unidiff_hunk): Remove ref to output_patch_flag.
+
+Tue Oct 23 23:19:18 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* diff.c (compare_files): For -D, compare even args are same file.
+	* analyze.c (diff_2_files): Likewise.
+	Also, output even if files have no differences.
+
+	* analyze.c (diff_2_files): Print missing newline messages last.
+	Return 2 if a newline is missing.
+	Print them even if files end with identical text.
+
+Mon Oct 22 19:40:09 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* diff.c (usage): Return 2.
+
+Wed Oct 10 20:54:04 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* diff.c (longopts): Add +new-files.
+
+Sun Sep 23 22:49:29 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* context.c (print_context_script): Handle unidiff_flag.
+	(print_context_header): Likewise.
+	(print_unidiff_number_range, pr_unidiff_hunk): New functions.
+	* diff.c (longopts): Add element for +unidiff.
+	(main): Handle +unidiff and -u.
+	(usage): Mention them.
+
+Wed Sep  5 16:33:22 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* io.c (find_and_hash_each_line): Deal with missing final newline
+	after buffering necessary context lines.
+
+Sat Sep  1 16:32:32 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* io.c (find_identical_ends): ROBUST_OUTPUT_FORMAT test was backward.
+
+Thu Aug 23 17:17:20 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* diff3.c (WIFEXITED): Undef it if WEXITSTATUS is not defined.
+	* context.c (find_function): Don't try to return values.
+
+Wed Aug 22 11:54:39 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* diff.h (O_RDONLY): Define if not defined.
+
+Tue Aug 21 13:49:26 1990  Richard Stallman  (rms at mole.ai.mit.edu)
+
+	* Handle -L option.
+	* context.c (print_context_label): New function.
+	(print_context_header): Use that.
+	* diff.c (main): Recognize the option.
+	(usage): Updated.
+	* diff.h (file_label): New variable.
+	* diff3.c (main): Recognize -L instead of -t.
+
+	* diff3.c (main): Support -m without other option.
+
+	* diff3.c (WEXITSTATUS, WIFEXITED): Define whenever not defined.
+
+	* diff3.c (bcopy, index, rindex): Delete definitions; not used.
+	(D_LINENUM, D_LINELEN): Likewise.
+	(struct diff_block): lengths includes newlines.
+	(struct diff3_block): Likewise.
+	(always_text, merge): New variables.
+	(read_diff): Return address of end, not size read.  Calls changed.
+	Pass -a to diff if given to diff3.
+	current_chunk_size now an int.  Detect error in `pipe'.
+	Check for incomplete line of output here.
+	(scan_diff_line): Don't make scan_ptr + 2 before knowing it is valid.
+	No need to check validity of diff output here.
+	Include newline in length of line.
+	(main): Compute rev_mapping here.  Handle -a and -m.
+	Error message if excess -t operands.  Error for incompatible options.
+	Error if `-' given more than once.
+	Fix error storing in tag_strings.
+	(output_diff3): REV_MAPPING is now an arg.  Call changed.
+	Change syntax of "missing newline" message.
+	Expect length of line to include newline.
+	(output_diff3_edscript): Return just 0 or 1.
+	REV_MAPPING is now an arg.  Call changed.
+	(output_diff3_merge): New function.
+	(process_diff): Better error message for bad diff format.
+	(fatal, perror_with_exit): Return status 2.
+
+	* analyze.c (diff_2_files): Report missing newline in either
+	or both files, if not robust output style.
+
+	* util.c (setup_output): Detect error from pipe.
+	No need to close stdin.
+
+	* util.c (print_1_line): Change format of missing-newline msg.
+	Change if statements to switch.
+
+	* io.c (slurp): Don't mention differences in final newline if -B.
+
+	* io.c (binary_file_p): Use ISO char set as criterion, not ASCII.
+
+	* io.c (find_identical_ends): Increase value of BEG0 by 1.
+	Other changes in backwards scan to avoid decrementing pointers
+	before start of array, and set LINES properly.
+
+	* diff.h (ROBUST_OUTPUT_STYLE): New macro.
+	* io.c (find_identical_ends, find_and_hash_each_line): Use that macro.
+
+	* diff.h (dup2): Don't define if XENIX.
+
+	* diff.c (main): Check for write error at end.
+
+	* context.c (find_function): Don't return a value.
+	Use argument FILE rather than global files.
+
+	* analyze.c: Add external function declarations.
+	* analyze.c (build_script): Turn off explicit check for final newline.
+
+	* analyze.c (discard_confusing_lines): Make integers unsigned.
+
+Tue Jul 31 21:37:16 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* io.c (find_and_hash_each_line): Correct the criterion
+	for leaving out the newline from the end of the line.
+
+Tue May 29 21:28:16 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* dir.c (diff_dirs): Free things only if nonzero.
+
+Mon Apr 16 18:31:05 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff.h (NDIR_IN_SYS): New macro controls location of ndir.h.
+
+	* diff3.c (xmalloc, xrealloc): Don't die if size == 0 returns 0.
+
+Sun Mar 25 15:58:42 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* analyze.c (discard_confusing_lines):
+	`many' wasn't being used; use it.
+	Cancelling provisionals near start of run must handle already
+	cancelled provisionals.
+	Cancelling subruns of provisionals was cancelling last nonprovisional.
+
+Sat Mar 24 14:02:51 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* analyze.c (discard_confusing_lines):
+	Threshold for line occurring many times scales by square root
+	of total lines.
+	Within each run, cancel any long subrun of provisionals.
+	Don't update `provisional' while cancelling provisionals.
+	In big outer loop, handle provisional and nonprovisional separately.
+
+Thu Mar 22 16:35:33 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* analyze.c (discard_confusing_lines):
+	The first loops to discard provisionals from ends failed to step.
+	In second such loops, keep discarding all consecutive provisionals.
+	Increase threshold for stopping discarding, and also check for
+	consecutive nondiscardables as separate threshold.
+
+Fri Mar 16 00:33:08 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff3.c (read_diff): Pass -- as first arg to diff.
+
+	* diff3.c: Include wait.h or define equivalent macros.
+	(read_diff): Don't use stdio printing error in the inferior.
+	Remember the pid and wait for it.  Report failing status.
+	Report failure of vfork.
+
+Sun Mar 11 17:10:32 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff3.c (main): Accept -t options and pass to output_diff3_edscript.
+	(usage): Mention -t.
+	(read_diff): Use vfork.
+	(vfork): Don't use it on Sparc.
+
+	* diff.h (vfork): Don't use it on Sparc.
+
+Tue Mar  6 22:37:20 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff3.c (dup2): Don't define on Xenix.
+
+	* Makefile: Comments for Xenix.
+
+Thu Mar  1 17:19:23 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* analyze.c (diff_2_files): `message' requires three args.
+
+Fri Feb 23 10:56:50 1990  David J. MacKenzie  (djm at albert.ai.mit.edu)
+
+	* diff.h, util.c, diff3.c: Change 'void *' to 'VOID *', with
+	VOID defined as void if __STDC__, char if not.
+
+Sun Feb 18 20:31:58 1990  David J. MacKenzie  (djm at albert.ai.mit.edu)
+
+	* Makefile: Add rules for getopt.c, getopt1.c, getopt.h.
+
+	* getopt.c, getopt.h, getopt1.c: New files.
+
+	* main.c (main, usage): Add long options.
+
+	* analyze.c (shift_boundaries): Remove unused var 'j_end'.
+
+Thu Feb  8 02:43:16 1990  Jim Kingdon  (kingdon at pogo.ai.mit.edu)
+
+	* GNUmakefile: include ../Makerules before Makefile.
+
+Fri Feb  2 23:21:38 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* analyze.c (diif_2_files): If -B or -I, don't return 1
+	if all changes were ignored.
+
+Wed Jan 24 20:43:57 1990  Richard Stallman  (rms at albert.ai.mit.edu)
+
+	* diff3.c (fatal): Output to stderr.
+
+Thu Jan 11 00:25:56 1990  David J. MacKenzie  (djm at hobbes.ai.mit.edu)
+
+	* diff.c (usage): Mention -v.
+
+Wed Jan 10 16:06:38 1990  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff3.c (output_diff3_edscript): Return number of overlaps.
+	(main): If have overlaps, exit with status 1.
+
+Sun Dec 24 10:29:20 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* io.c (find_equiv_class): Fix typo that came from changing init of B
+	to an assigment.
+
+	* version.c: New file.
+	* diff.c (main): -v prints version number.
+
+	* io.c (binary_file_p): Null char implies binary file.
+
+Fri Nov 17 23:44:55 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* util.c (print_1_line): Fix off by 1 error.
+
+Thu Nov 16 13:51:10 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* util.c (xcalloc): Function deleted.
+
+	* io.c (slurp): Null-terminate the buffer.
+
+	* io.c (read_files): Delete unused vars.
+
+	* io.c (find_equiv_class): Don't index by N if too low.
+
+	* dir.c (dir_sort): Delete the extra declaration of compare_names.
+
+	* diff.h: Don't declare xcalloc.  Declare some other functions.
+
+	* analyze.c (shift_boundaries):
+	Test for END at end of range before indexing by it.
+	Fix typo `preceeding' in var names.
+
+Sat Nov 11 14:04:16 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff3.c (using_to_diff3_block): Delete unused vars.
+	(make_3way_diff, process_diff_control, read_diff, output_diff3): Likewise.
+
+Mon Nov  6 18:15:50 EST 1989 Jay Fenlason (hack@ai.mit.edu)
+
+	* README Fix typo.
+
+Fri Nov  3 15:27:47 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff.c (usage): Mention -D.
+
+	* ifdef.c (print_ifdef_hunk): Write comments on #else and #endif.
+
+Sun Oct 29 16:41:07 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff.c (compare_files): Don't fflush for identical files.
+
+Wed Oct 25 17:57:12 1989  Randy Smith  (randy at apple-gunkies.ai.mit.edu)
+
+	* diff3.c (using_to_diff3_block): When defaulting lines from
+	FILE0, only copy up to just under the *lowest* line mentioned
+	in the next diff.
+
+	* diff3.c (fatal): Add \n to error messages.
+
+Wed Oct 25 15:05:49 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* Makefile (tapefiles): Add ChangeLog.
+
+Tue Oct  3 00:51:17 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff3.c (process_diff, create_diff3_block): Init ->next field.
+
+Fri Sep 29 08:16:45 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* util.c (line_cmp): Alter end char of line 2, not line 1.
+
+Wed Sep 20 00:12:37 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* Makefile (diff.tar): Expect ln to fail on some files;
+	copy them with cp.
+
+Mon Sep 18 02:54:29 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* Handle -D option:
+	* io.c (find_and_hash_each_line): Keep all lines of 1st file.
+	* diff.c (main): Handle -D option.
+	(compare_files): Reject -D if files spec'd are directories.
+	* analyze.c (diff_2_files): Handle OUTPUT_IFDEF case.
+
+Fri Sep  1 20:15:50 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff.c (option_list): Rename arg VECTOR as OPTIONVEC.
+
+Mon Aug 28 17:58:27 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff.c (compare_files): Clear entire inf[i].stat.
+
+Wed Aug 23 17:48:47 1989  Richard Stallman  (rms at apple-gunkies.ai.mit.edu)
+
+	* io.c (find_identical_ends): Sign was backward
+	determining where to bound the scan for the suffix.
+
+Wed Aug 16 12:49:16 1989  Richard Stallman  (rms at hobbes.ai.mit.edu)
+
+	* analyze.c (diff_2_files): If -q, treat all files as binary.
+	* diff.c (main): Detect -q, record in no_details_flag.
+
+Sun Jul 30 23:12:00 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff.c (usage): New function.
+	(main): Call it.
+
+Wed Jul 26 02:02:19 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff.c (main): Make -C imply -c.
+
+Thu Jul 20 17:57:51 1989  Chris Hanson  (cph at kleph)
+
+	* io.c (find_and_hash_each_line): Bug fix in context handling,
+	introduced by last change.
+
+Fri Jul 14 17:39:20 1989  Chris Hanson  (cph at kleph)
+
+	* analyze.c: To make RCS work correctly on files that don't
+	necessarily end in newline, introduce some changes that cause
+	diffs to be sensitive to missing final newline.  Because
+	non-RCS modes don't want to be affected by these changes, they
+	are conditional on `output_style == OUTPUT_RCS'.
+	(diff_2_files) [OUTPUT_RCS]: Suppress the "File X missing
+	newline" message.
+	(build_script) [OUTPUT_RCS]: Cause the last line to compare as
+	different if exactly one of the files is missing its final
+	newline.
+
+	* io.c (find_and_hash_each_line): Bug fix in
+	ignore_space_change mode.  Change line's length to include the
+	newline.  For OUTPUT_RCS, decrement last line's length if
+	there is no final newline.
+	(find_identical_ends) [OUTPUT_RCS]: If one of the files is
+	missing a final newline, make sure it's not included in either
+	the prefix or suffix.
+
+	* util.c (print_1_line): Change line output routine to account
+	for line length including the newline.
+
+Tue Jun 27 02:35:28 1989  Roland McGrath  (roland at hobbes.ai.mit.edu)
+
+	* Makefile: Inserted $(archpfx) where appropriate.
+
+Wed May 17 20:18:43 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff3.c [USG]: Include fcntl.h.
+
+	* diff.h [USG]: New compilation flags HAVE_NDIR, HAVE_DIRECT.
+
+Wed Apr 26 15:35:57 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* dir.c (diff_dirs): Two new args, NONEX1 and NONEX2, say to pretend
+	nonex dirs are empty.
+	(dir_sort): New arg NONEX, likewise.
+	* diff.c (compare_files): Pass those args.
+	Sometimes call diff_dirs if subdir exists in just one place.
+
+Wed Apr 12 01:10:27 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* io.c (find_identical_ends): Set END0 *after* last char
+	during backward scan for suffix.
+
+Sat Apr  8 15:49:49 1989  Randall Smith  (randy at apple-gunkies.ai.mit.edu)
+
+	* diff3.c (using_to_diff3_block): Now find high marks in files 1
+	and 2 through mapping off of the last difference instead of the
+	first.
+
+	* diff3.c: Many trivial changes to spelling inside comments.
+
+Fri Feb 24 12:38:03 1989  Randall Smith  (randy at gluteus.ai.mit.edu)
+
+	* util.c, normal.c, io.c, ed.c, dir.c, diff.h, diff.c, context.c,
+	analyze.c, Makefile: Changed copyright header to conform with new
+	GNU General Public license.
+	* diff3.c: Changed copyright header to conform with new GNU
+	General Public license.
+	* COPYING: Made a hard link to /gp/rms/COPYING.
+
+Fri Feb 24 10:01:58 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* io.c (slurp): Leave 2 chars space at end of buffer, not one.
+	(find_identical_ends): Special case if either file is empty;
+	don't try to make a sentinel since could crash.
+
+Wed Feb 15 14:24:48 1989  Jay Fenlason  (hack at apple-gunkies.ai.mit.edu)
+
+	* diff3.c (message)  Re-wrote routine to avoid using alloca()
+
+Wed Feb 15 06:19:14 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* io.c (find_identical_ends): Delete the variable `bytes'.
+
+Sun Feb 12 11:50:36 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* io.c (slurp): ->bufsize is nominal amount we have room for;
+	add room for sentinel when calling xmalloc or xrealloc.
+
+	* io.c (find_identical_ends): Do need overrun check in finding suffix.
+
+Fri Feb 10 01:28:15 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff.c (main): -C now takes arg to specify context length.
+	Now -p to show C function name--Damned IEEE!
+	Fatal error if context length spec'd twice.
+
+	* ed.c (print_ed_hunk): Now special treatment only for lines containing
+	precisely a dot and nothing else.  Output `..', end the insert,
+	substitute that one line, then resume the insert if nec.
+
+	* io.c (find_and_hash_lines): When backing up over starting context,
+	don't move past buffer-beg.
+
+	* io.c (find_identical_ends): Use sentinels to make the loops faster.
+	If files are identical, skip the 2nd loop and return quickly.
+	(slurp): Leave 1 char extra space after each buffer.
+
+	* analyze.c (diff_2_files): Mention difference in final newlines.
+
+Wed Jan 25 22:44:44 1989  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* dir.c (diff_dirs): Use * when calling fcn ptr variable.
+
+Sat Dec 17 14:12:06 1988  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* Makefile: New vars INSTALL and LIBS used in some rules;
+	provide default defns plus commented-put defns for sysV.
+
+Thu Nov 17 16:42:53 1988  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* dir.c (dir_sort): Open-trouble not fatal; just say # files is -1.
+	(diff_dirs): If dir_sort does that, give up and return 2.
+
+	* diff.c (compare_files): Don't open directories.
+	Don't close them specially either.
+	Cross-propagate inf[i].dir_p sooner.
+
+Sun Nov 13 11:19:36 1988  Richard Stallman  (rms at sugar-bombs.ai.mit.edu)
+
+	* diff.h: Declare index, rindex.
+
+	* diff.c (compare_files): If comparing foodir with b/f,
+	use foodir/f, not foodir/b/f.
+
+	* diff.c (compare_files): Don't print "are identical" msg for 2 dirs.
+	Status now 1 if one file is a dir and the other isn't, etc.
+
+Thu Nov  3 16:30:24 1988  Randall Smith  (randy at gluteus.ai.mit.edu)
+
+	* Makefile: Added a define for diff3 to define DIFF_PROGRAM.
+
+	* util.c: Added hack to make sure that perror was not called with
+	a null pointer.
+
+	* diff.c: Changed S_IFDIR to S_IFMT in masking type of file bits
+	out.
+
+	* diff3.c: Included USG compatibility defines.
+
+	* diff.h: Moved sys/file.h into #else USG section (not needed or
+	wanted on System V).
+
+	* ed.c, analyze.c, context.c: Shortened names to 12 characters for
+	the sake of System V (too simple not to do).
+
+Local Variables:
+mode: indented-text
+left-margin: 8
+version-control: never
+End:

+ 24 - 0
sys/src/ape/cmd/diff/FREEBSD-upgrade

@@ -0,0 +1,24 @@
+Import of GNU diff 2.7
+
+Original source available as ftp://prep.ai.mit.edu/pub/gnu/diffutils-2.7.tar.gz
+
+The following files and directories were removed for this import:
+
+Makefile.in
+INSTALL
+alloca.c
+cmp.c
+diff.info
+diff.info-1
+diff.info-2
+diff.info-3
+diff.info-4
+error.c
+fnmatch.c
+fnmatch.h
+memchr.c
+mkinstalldirs
+regex.c
+regex.h
+texinfo.tex
+waitpid.c

+ 126 - 0
sys/src/ape/cmd/diff/NEWS

@@ -0,0 +1,126 @@
+User-visible changes in version 2.7:
+
+* New diff option: --binary (useful only on non-Posix hosts)
+* diff -b and -w now ignore line incompleteness; -B no longer does this.
+* cmp -c now uses locale to decide which output characters to quote.
+* Help and version messages are reorganized.
+
+
+User-visible changes in version 2.6:
+
+* New cmp, diff, diff3, sdiff option: --help
+* A new heuristic for diff greatly reduces the time needed to compare
+  large input files that contain many differences.
+* Partly as a result, GNU diff's output is not exactly the same as before.
+  Usually it is a bit smaller, but sometimes it is a bit larger.
+
+
+User-visible changes in version 2.5:
+
+* New cmp option: -v --version
+
+
+User-visible changes in version 2.4:
+
+* New cmp option: --ignore-initial=BYTES
+* New diff3 option: -T --initial-tab
+* New diff option: --line-format=FORMAT
+* New diff group format specifications:
+  <PRINTF_SPEC>[eflmnEFLMN]
+      A printf spec followed by one of the following letters
+      causes the integer corresponding to that letter to be
+      printed according to the printf specification.
+      E.g. `%5df' prints the number of the first line in the
+      group in the old file using the "%5d" format.
+	e: line number just before the group in old file; equals f - 1
+	f: first line number in group in the old file
+	l: last line number in group in the old file
+	m: line number just after the group in old file; equals l + 1
+	n: number of lines in group in the old file; equals l - f + 1
+	E, F, L, M, N: likewise, for lines in the new file
+  %(A=B?T:E)
+      If A equals B then T else E.  A and B are each either a decimal
+      constant or a single letter interpreted as above.  T and E are
+      arbitrary format strings.  This format spec is equivalent to T if
+      A's value equals B's; otherwise it is equivalent to E.  For
+      example, `%(N=0?no:%dN) line%(N=1?:s)' is equivalent to `no lines'
+      if N (the number of lines in the group in the the new file) is 0,
+      to `1 line' if N is 1, and to `%dN lines' otherwise.
+  %c'C'
+      where C is a single character, stands for the character C.  C may not
+      be a backslash or an apostrophe.  E.g. %c':' stands for a colon.
+  %c'\O'
+      where O is a string of 1, 2, or 3 octal digits, stands for the
+      character with octal code O.  E.g. %c'\0' stands for a null character.
+* New diff line format specifications:
+  <PRINTF_SPEC>n
+      The line number, printed with <PRINTF_SPEC>.
+      E.g. `%5dn' prints the line number with a "%5d" format.
+  %c'C'
+  %c'\O'
+      The character C, or with octal code O, as above.
+* Supported <PRINTF_SPEC>s have the same meaning as with printf, but must
+  match the extended regular expression %-*[0-9]*(\.[0-9]*)?[doxX].
+* The format spec %0 introduced in version 2.1 has been removed, since it
+  is incompatible with printf specs like %02d.  To represent a null char,
+  use %c'\0' instead.
+* cmp and diff now conform to Posix.2 (ISO/IEC 9945-2:1993)
+  if the underlying system conforms to Posix:
+  - Some messages' wordings are changed in minor ways.
+  - ``White space'' is now whatever C's `isspace' says it is.
+  - When comparing directories, if `diff' finds a file that is not a regular
+    file or a directory, it reports the file's type instead of diffing it.
+    (As usual, it follows symbolic links first.)
+  - When signaled, sdiff exits with the signal's status, not with status 2.
+* Now portable to hosts where int, long, pointer, etc. are not all the same
+  size.
+* `cmp - -' now works like `diff - -'.
+
+
+User-visible changes in version 2.3:
+
+* New diff option: --horizon-lines=lines
+
+
+User-visible changes in version 2.1:
+
+* New diff options:
+  --{old,new,unchanged}-line-format='format'
+  --{old,new,unchanged,changed}-group-format='format'
+  -U
+* New diff3 option:
+  -A --show-all
+* diff3 -m now defaults to -A, not -E.
+* diff3 now takes up to three -L or --label options, not just two.
+  If just two options are given, they refer to the first two input files,
+  not the first and third input files.
+* sdiff and diff -y handle incomplete lines.
+
+
+User-visible changes in version 2.0:
+
+* Add sdiff and cmp programs.
+* Add Texinfo documentation.
+* Add configure script.
+* Improve diff performance.
+* New diff options:
+-x --exclude
+-X --exclude-from
+-P --unidirectional-new-file
+-W --width
+-y --side-by-side
+--left-column
+--sdiff-merge-assist
+--suppress-common-lines
+* diff options renamed:
+--label renamed from --file-label
+--forward-ed renamed from --reversed-ed
+--paginate renamed from --print
+--entire-new-file renamed from --entire-new-files
+--new-file renamed from --new-files
+--all-text removed
+* New diff3 options:
+-v --version
+* Add long-named equivalents for other diff3 options.
+* diff options -F (--show-function-line) and -I (--ignore-matching-lines)
+  can now be given more than once.

+ 9 - 0
sys/src/ape/cmd/diff/README

@@ -0,0 +1,9 @@
+This directory contains the GNU diff, diff3, sdiff, and cmp utilities.
+Their features are a superset of the Unix features and they are
+significantly faster.  cmp has been moved here from the GNU textutils.
+
+See the file COPYING for copying conditions.
+See the file diff.texi (or diff.info*) for documentation.
+See the file INSTALL for compilation and installation instructions.
+
+Report bugs to bug-gnu-utils@prep.ai.mit.edu

+ 1084 - 0
sys/src/ape/cmd/diff/analyze.c

@@ -0,0 +1,1084 @@
+/* Analyze file differences for GNU DIFF.
+   Copyright (C) 1988, 1989, 1992, 1993 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+/* The basic algorithm is described in:
+   "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
+   Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
+   see especially section 4.2, which describes the variation used below.
+   Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE
+   heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
+   at the price of producing suboptimal output for large inputs with
+   many differences.
+
+   The basic algorithm was independently discovered as described in:
+   "Algorithms for Approximate String Matching", E. Ukkonen,
+   Information and Control Vol. 64, 1985, pp. 100-118.  */
+
+#include "diff.h"
+#include "cmpbuf.h"
+
+extern int no_discards;
+
+static int *xvec, *yvec;	/* Vectors being compared. */
+static int *fdiag;		/* Vector, indexed by diagonal, containing
+				   1 + the X coordinate of the point furthest
+				   along the given diagonal in the forward
+				   search of the edit matrix. */
+static int *bdiag;		/* Vector, indexed by diagonal, containing
+				   the X coordinate of the point furthest
+				   along the given diagonal in the backward
+				   search of the edit matrix. */
+static int too_expensive;	/* Edit scripts longer than this are too
+				   expensive to compute.  */
+
+#define SNAKE_LIMIT 20	/* Snakes bigger than this are considered `big'.  */
+
+struct partition
+{
+  int xmid, ymid;	/* Midpoints of this partition.  */
+  int lo_minimal;	/* Nonzero if low half will be analyzed minimally.  */
+  int hi_minimal;	/* Likewise for high half.  */
+};
+
+static int diag PARAMS((int, int, int, int, int, struct partition *));
+static struct change *add_change PARAMS((int, int, int, int, struct change *));
+static struct change *build_reverse_script PARAMS((struct file_data const[]));
+static struct change *build_script PARAMS((struct file_data const[]));
+static void briefly_report PARAMS((int, struct file_data const[]));
+static void compareseq PARAMS((int, int, int, int, int));
+static void discard_confusing_lines PARAMS((struct file_data[]));
+static void shift_boundaries PARAMS((struct file_data[]));
+
+/* Find the midpoint of the shortest edit script for a specified
+   portion of the two files.
+
+   Scan from the beginnings of the files, and simultaneously from the ends,
+   doing a breadth-first search through the space of edit-sequence.
+   When the two searches meet, we have found the midpoint of the shortest
+   edit sequence.
+
+   If MINIMAL is nonzero, find the minimal edit script regardless
+   of expense.  Otherwise, if the search is too expensive, use
+   heuristics to stop the search and report a suboptimal answer.
+
+   Set PART->(XMID,YMID) to the midpoint (XMID,YMID).  The diagonal number
+   XMID - YMID equals the number of inserted lines minus the number
+   of deleted lines (counting only lines before the midpoint).
+   Return the approximate edit cost; this is the total number of
+   lines inserted or deleted (counting only lines before the midpoint),
+   unless a heuristic is used to terminate the search prematurely.
+
+   Set PART->LEFT_MINIMAL to nonzero iff the minimal edit script for the
+   left half of the partition is known; similarly for PART->RIGHT_MINIMAL.
+
+   This function assumes that the first lines of the specified portions
+   of the two files do not match, and likewise that the last lines do not
+   match.  The caller must trim matching lines from the beginning and end
+   of the portions it is going to specify.
+
+   If we return the "wrong" partitions,
+   the worst this can do is cause suboptimal diff output.
+   It cannot cause incorrect diff output.  */
+
+static int
+diag (xoff, xlim, yoff, ylim, minimal, part)
+     int xoff, xlim, yoff, ylim, minimal;
+     struct partition *part;
+{
+  int *const fd = fdiag;	/* Give the compiler a chance. */
+  int *const bd = bdiag;	/* Additional help for the compiler. */
+  int const *const xv = xvec;	/* Still more help for the compiler. */
+  int const *const yv = yvec;	/* And more and more . . . */
+  int const dmin = xoff - ylim;	/* Minimum valid diagonal. */
+  int const dmax = xlim - yoff;	/* Maximum valid diagonal. */
+  int const fmid = xoff - yoff;	/* Center diagonal of top-down search. */
+  int const bmid = xlim - ylim;	/* Center diagonal of bottom-up search. */
+  int fmin = fmid, fmax = fmid;	/* Limits of top-down search. */
+  int bmin = bmid, bmax = bmid;	/* Limits of bottom-up search. */
+  int c;			/* Cost. */
+  int odd = (fmid - bmid) & 1;	/* True if southeast corner is on an odd
+				   diagonal with respect to the northwest. */
+
+  fd[fmid] = xoff;
+  bd[bmid] = xlim;
+
+  for (c = 1;; ++c)
+    {
+      int d;			/* Active diagonal. */
+      int big_snake = 0;
+
+      /* Extend the top-down search by an edit step in each diagonal. */
+      fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin;
+      fmax < dmax ? fd[++fmax + 1] = -1 : --fmax;
+      for (d = fmax; d >= fmin; d -= 2)
+	{
+	  int x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1];
+
+	  if (tlo >= thi)
+	    x = tlo + 1;
+	  else
+	    x = thi;
+	  oldx = x;
+	  y = x - d;
+	  while (x < xlim && y < ylim && xv[x] == yv[y])
+	    ++x, ++y;
+	  if (x - oldx > SNAKE_LIMIT)
+	    big_snake = 1;
+	  fd[d] = x;
+	  if (odd && bmin <= d && d <= bmax && bd[d] <= x)
+	    {
+	      part->xmid = x;
+	      part->ymid = y;
+	      part->lo_minimal = part->hi_minimal = 1;
+	      return 2 * c - 1;
+	    }
+	}
+
+      /* Similarly extend the bottom-up search.  */
+      bmin > dmin ? bd[--bmin - 1] = INT_MAX : ++bmin;
+      bmax < dmax ? bd[++bmax + 1] = INT_MAX : --bmax;
+      for (d = bmax; d >= bmin; d -= 2)
+	{
+	  int x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1];
+
+	  if (tlo < thi)
+	    x = tlo;
+	  else
+	    x = thi - 1;
+	  oldx = x;
+	  y = x - d;
+	  while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])
+	    --x, --y;
+	  if (oldx - x > SNAKE_LIMIT)
+	    big_snake = 1;
+	  bd[d] = x;
+	  if (!odd && fmin <= d && d <= fmax && x <= fd[d])
+	    {
+	      part->xmid = x;
+	      part->ymid = y;
+	      part->lo_minimal = part->hi_minimal = 1;
+	      return 2 * c;
+	    }
+	}
+
+      if (minimal)
+	continue;
+
+      /* Heuristic: check occasionally for a diagonal that has made
+	 lots of progress compared with the edit distance.
+	 If we have any such, find the one that has made the most
+	 progress and return it as if it had succeeded.
+
+	 With this heuristic, for files with a constant small density
+	 of changes, the algorithm is linear in the file size.  */
+
+      if (c > 200 && big_snake && heuristic)
+	{
+	  int best;
+
+	  best = 0;
+	  for (d = fmax; d >= fmin; d -= 2)
+	    {
+	      int dd = d - fmid;
+	      int x = fd[d];
+	      int y = x - d;
+	      int v = (x - xoff) * 2 - dd;
+	      if (v > 12 * (c + (dd < 0 ? -dd : dd)))
+		{
+		  if (v > best
+		      && xoff + SNAKE_LIMIT <= x && x < xlim
+		      && yoff + SNAKE_LIMIT <= y && y < ylim)
+		    {
+		      /* We have a good enough best diagonal;
+			 now insist that it end with a significant snake.  */
+		      int k;
+
+		      for (k = 1; xv[x - k] == yv[y - k]; k++)
+			if (k == SNAKE_LIMIT)
+			  {
+			    best = v;
+			    part->xmid = x;
+			    part->ymid = y;
+			    break;
+			  }
+		    }
+		}
+	    }
+	  if (best > 0)
+	    {
+	      part->lo_minimal = 1;
+	      part->hi_minimal = 0;
+	      return 2 * c - 1;
+	    }
+
+	  best = 0;
+	  for (d = bmax; d >= bmin; d -= 2)
+	    {
+	      int dd = d - bmid;
+	      int x = bd[d];
+	      int y = x - d;
+	      int v = (xlim - x) * 2 + dd;
+	      if (v > 12 * (c + (dd < 0 ? -dd : dd)))
+		{
+		  if (v > best
+		      && xoff < x && x <= xlim - SNAKE_LIMIT
+		      && yoff < y && y <= ylim - SNAKE_LIMIT)
+		    {
+		      /* We have a good enough best diagonal;
+			 now insist that it end with a significant snake.  */
+		      int k;
+
+		      for (k = 0; xv[x + k] == yv[y + k]; k++)
+			if (k == SNAKE_LIMIT - 1)
+			  {
+			    best = v;
+			    part->xmid = x;
+			    part->ymid = y;
+			    break;
+			  }
+		    }
+		}
+	    }
+	  if (best > 0)
+	    {
+	      part->lo_minimal = 0;
+	      part->hi_minimal = 1;
+	      return 2 * c - 1;
+	    }
+	}
+
+      /* Heuristic: if we've gone well beyond the call of duty,
+	 give up and report halfway between our best results so far.  */
+      if (c >= too_expensive)
+	{
+	  int fxybest, fxbest;
+	  int bxybest, bxbest;
+
+	  fxbest = bxbest = 0;  /* Pacify `gcc -Wall'.  */
+
+	  /* Find forward diagonal that maximizes X + Y.  */
+	  fxybest = -1;
+	  for (d = fmax; d >= fmin; d -= 2)
+	    {
+	      int x = min (fd[d], xlim);
+	      int y = x - d;
+	      if (ylim < y)
+		x = ylim + d, y = ylim;
+	      if (fxybest < x + y)
+		{
+		  fxybest = x + y;
+		  fxbest = x;
+		}
+	    }
+
+	  /* Find backward diagonal that minimizes X + Y.  */
+	  bxybest = INT_MAX;
+	  for (d = bmax; d >= bmin; d -= 2)
+	    {
+	      int x = max (xoff, bd[d]);
+	      int y = x - d;
+	      if (y < yoff)
+		x = yoff + d, y = yoff;
+	      if (x + y < bxybest)
+		{
+		  bxybest = x + y;
+		  bxbest = x;
+		}
+	    }
+
+	  /* Use the better of the two diagonals.  */
+	  if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
+	    {
+	      part->xmid = fxbest;
+	      part->ymid = fxybest - fxbest;
+	      part->lo_minimal = 1;
+	      part->hi_minimal = 0;
+	    }
+	  else
+	    {
+	      part->xmid = bxbest;
+	      part->ymid = bxybest - bxbest;
+	      part->lo_minimal = 0;
+	      part->hi_minimal = 1;
+	    }
+	  return 2 * c - 1;
+	}
+    }
+}
+
+/* Compare in detail contiguous subsequences of the two files
+   which are known, as a whole, to match each other.
+
+   The results are recorded in the vectors files[N].changed_flag, by
+   storing a 1 in the element for each line that is an insertion or deletion.
+
+   The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
+
+   Note that XLIM, YLIM are exclusive bounds.
+   All line numbers are origin-0 and discarded lines are not counted.
+ 
+   If MINIMAL is nonzero, find a minimal difference no matter how
+   expensive it is.  */
+
+static void
+compareseq (xoff, xlim, yoff, ylim, minimal)
+     int xoff, xlim, yoff, ylim, minimal;
+{
+  int * const xv = xvec; /* Help the compiler.  */
+  int * const yv = yvec;
+
+  /* Slide down the bottom initial diagonal. */
+  while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
+    ++xoff, ++yoff;
+  /* Slide up the top initial diagonal. */
+  while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1])
+    --xlim, --ylim;
+
+  /* Handle simple cases. */
+  if (xoff == xlim)
+    while (yoff < ylim)
+      files[1].changed_flag[files[1].realindexes[yoff++]] = 1;
+  else if (yoff == ylim)
+    while (xoff < xlim)
+      files[0].changed_flag[files[0].realindexes[xoff++]] = 1;
+  else
+    {
+      int c;
+      struct partition part;
+
+      /* Find a point of correspondence in the middle of the files.  */
+
+      c = diag (xoff, xlim, yoff, ylim, minimal, &part);
+
+      if (c == 1)
+	{
+	  /* This should be impossible, because it implies that
+	     one of the two subsequences is empty,
+	     and that case was handled above without calling `diag'.
+	     Let's verify that this is true.  */
+	  abort ();
+#if 0
+	  /* The two subsequences differ by a single insert or delete;
+	     record it and we are done.  */
+	  if (part.xmid - part.ymid < xoff - yoff)
+	    files[1].changed_flag[files[1].realindexes[part.ymid - 1]] = 1;
+	  else
+	    files[0].changed_flag[files[0].realindexes[part.xmid]] = 1;
+#endif
+	}
+      else
+	{
+	  /* Use the partitions to split this problem into subproblems.  */
+	  compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);
+	  compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);
+	}
+    }
+}
+
+/* Discard lines from one file that have no matches in the other file.
+
+   A line which is discarded will not be considered by the actual
+   comparison algorithm; it will be as if that line were not in the file.
+   The file's `realindexes' table maps virtual line numbers
+   (which don't count the discarded lines) into real line numbers;
+   this is how the actual comparison algorithm produces results
+   that are comprehensible when the discarded lines are counted.
+
+   When we discard a line, we also mark it as a deletion or insertion
+   so that it will be printed in the output.  */
+
+static void
+discard_confusing_lines (filevec)
+     struct file_data filevec[];
+{
+  unsigned int f, i;
+  char *discarded[2];
+  int *equiv_count[2];
+  int *p;
+
+  /* Allocate our results.  */
+  p = (int *) xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
+		       * (2 * sizeof (int)));
+  for (f = 0; f < 2; f++)
+    {
+      filevec[f].undiscarded = p;  p += filevec[f].buffered_lines;
+      filevec[f].realindexes = p;  p += filevec[f].buffered_lines;
+    }
+
+  /* Set up equiv_count[F][I] as the number of lines in file F
+     that fall in equivalence class I.  */
+
+  p = (int *) xmalloc (filevec[0].equiv_max * (2 * sizeof (int)));
+  equiv_count[0] = p;
+  equiv_count[1] = p + filevec[0].equiv_max;
+  bzero (p, filevec[0].equiv_max * (2 * sizeof (int)));
+
+  for (i = 0; i < filevec[0].buffered_lines; ++i)
+    ++equiv_count[0][filevec[0].equivs[i]];
+  for (i = 0; i < filevec[1].buffered_lines; ++i)
+    ++equiv_count[1][filevec[1].equivs[i]];
+
+  /* Set up tables of which lines are going to be discarded.  */
+
+  discarded[0] = xmalloc (sizeof (char)
+			  * (filevec[0].buffered_lines
+			     + filevec[1].buffered_lines));
+  discarded[1] = discarded[0] + filevec[0].buffered_lines;
+  bzero (discarded[0], sizeof (char) * (filevec[0].buffered_lines
+					+ filevec[1].buffered_lines));
+
+  /* Mark to be discarded each line that matches no line of the other file.
+     If a line matches many lines, mark it as provisionally discardable.  */
+
+  for (f = 0; f < 2; f++)
+    {
+      unsigned int end = filevec[f].buffered_lines;
+      char *discards = discarded[f];
+      int *counts = equiv_count[1 - f];
+      int *equivs = filevec[f].equivs;
+      unsigned int many = 5;
+      unsigned int tem = end / 64;
+
+      /* Multiply MANY by approximate square root of number of lines.
+	 That is the threshold for provisionally discardable lines.  */
+      while ((tem = tem >> 2) > 0)
+	many *= 2;
+
+      for (i = 0; i < end; i++)
+	{
+	  int nmatch;
+	  if (equivs[i] == 0)
+	    continue;
+	  nmatch = counts[equivs[i]];
+	  if (nmatch == 0)
+	    discards[i] = 1;
+	  else if (nmatch > many)
+	    discards[i] = 2;
+	}
+    }
+
+  /* Don't really discard the provisional lines except when they occur
+     in a run of discardables, with nonprovisionals at the beginning
+     and end.  */
+
+  for (f = 0; f < 2; f++)
+    {
+      unsigned int end = filevec[f].buffered_lines;
+      register char *discards = discarded[f];
+
+      for (i = 0; i < end; i++)
+	{
+	  /* Cancel provisional discards not in middle of run of discards.  */
+	  if (discards[i] == 2)
+	    discards[i] = 0;
+	  else if (discards[i] != 0)
+	    {
+	      /* We have found a nonprovisional discard.  */
+	      register int j;
+	      unsigned int length;
+	      unsigned int provisional = 0;
+
+	      /* Find end of this run of discardable lines.
+		 Count how many are provisionally discardable.  */
+	      for (j = i; j < end; j++)
+		{
+		  if (discards[j] == 0)
+		    break;
+		  if (discards[j] == 2)
+		    ++provisional;
+		}
+
+	      /* Cancel provisional discards at end, and shrink the run.  */
+	      while (j > i && discards[j - 1] == 2)
+		discards[--j] = 0, --provisional;
+
+	      /* Now we have the length of a run of discardable lines
+		 whose first and last are not provisional.  */
+	      length = j - i;
+
+	      /* If 1/4 of the lines in the run are provisional,
+		 cancel discarding of all provisional lines in the run.  */
+	      if (provisional * 4 > length)
+		{
+		  while (j > i)
+		    if (discards[--j] == 2)
+		      discards[j] = 0;
+		}
+	      else
+		{
+		  register unsigned int consec;
+		  unsigned int minimum = 1;
+		  unsigned int tem = length / 4;
+
+		  /* MINIMUM is approximate square root of LENGTH/4.
+		     A subrun of two or more provisionals can stand
+		     when LENGTH is at least 16.
+		     A subrun of 4 or more can stand when LENGTH >= 64.  */
+		  while ((tem = tem >> 2) > 0)
+		    minimum *= 2;
+		  minimum++;
+
+		  /* Cancel any subrun of MINIMUM or more provisionals
+		     within the larger run.  */
+		  for (j = 0, consec = 0; j < length; j++)
+		    if (discards[i + j] != 2)
+		      consec = 0;
+		    else if (minimum == ++consec)
+		      /* Back up to start of subrun, to cancel it all.  */
+		      j -= consec;
+		    else if (minimum < consec)
+		      discards[i + j] = 0;
+
+		  /* Scan from beginning of run
+		     until we find 3 or more nonprovisionals in a row
+		     or until the first nonprovisional at least 8 lines in.
+		     Until that point, cancel any provisionals.  */
+		  for (j = 0, consec = 0; j < length; j++)
+		    {
+		      if (j >= 8 && discards[i + j] == 1)
+			break;
+		      if (discards[i + j] == 2)
+			consec = 0, discards[i + j] = 0;
+		      else if (discards[i + j] == 0)
+			consec = 0;
+		      else
+			consec++;
+		      if (consec == 3)
+			break;
+		    }
+
+		  /* I advances to the last line of the run.  */
+		  i += length - 1;
+
+		  /* Same thing, from end.  */
+		  for (j = 0, consec = 0; j < length; j++)
+		    {
+		      if (j >= 8 && discards[i - j] == 1)
+			break;
+		      if (discards[i - j] == 2)
+			consec = 0, discards[i - j] = 0;
+		      else if (discards[i - j] == 0)
+			consec = 0;
+		      else
+			consec++;
+		      if (consec == 3)
+			break;
+		    }
+		}
+	    }
+	}
+    }
+
+  /* Actually discard the lines. */
+  for (f = 0; f < 2; f++)
+    {
+      char *discards = discarded[f];
+      unsigned int end = filevec[f].buffered_lines;
+      unsigned int j = 0;
+      for (i = 0; i < end; ++i)
+	if (no_discards || discards[i] == 0)
+	  {
+	    filevec[f].undiscarded[j] = filevec[f].equivs[i];
+	    filevec[f].realindexes[j++] = i;
+	  }
+	else
+	  filevec[f].changed_flag[i] = 1;
+      filevec[f].nondiscarded_lines = j;
+    }
+
+  free (discarded[0]);
+  free (equiv_count[0]);
+}
+
+/* Adjust inserts/deletes of identical lines to join changes
+   as much as possible.
+
+   We do something when a run of changed lines include a
+   line at one end and have an excluded, identical line at the other.
+   We are free to choose which identical line is included.
+   `compareseq' usually chooses the one at the beginning,
+   but usually it is cleaner to consider the following identical line
+   to be the "change".  */
+
+int inhibit;
+
+static void
+shift_boundaries (filevec)
+     struct file_data filevec[];
+{
+  int f;
+
+  if (inhibit)
+    return;
+
+  for (f = 0; f < 2; f++)
+    {
+      char *changed = filevec[f].changed_flag;
+      char const *other_changed = filevec[1-f].changed_flag;
+      int const *equivs = filevec[f].equivs;
+      int i = 0;
+      int j = 0;
+      int i_end = filevec[f].buffered_lines;
+
+      while (1)
+	{
+	  int runlength, start, corresponding;
+
+	  /* Scan forwards to find beginning of another run of changes.
+	     Also keep track of the corresponding point in the other file.  */
+
+	  while (i < i_end && changed[i] == 0)
+	    {
+	      while (other_changed[j++])
+		continue;
+	      i++;
+	    }
+
+	  if (i == i_end)
+	    break;
+
+	  start = i;
+
+	  /* Find the end of this run of changes.  */
+
+	  while (changed[++i])
+	    continue;
+	  while (other_changed[j])
+	    j++;
+
+	  do
+	    {
+	      /* Record the length of this run of changes, so that
+		 we can later determine whether the run has grown.  */
+	      runlength = i - start;
+
+	      /* Move the changed region back, so long as the
+		 previous unchanged line matches the last changed one.
+		 This merges with previous changed regions.  */
+
+	      while (start && equivs[start - 1] == equivs[i - 1])
+		{
+		  changed[--start] = 1;
+		  changed[--i] = 0;
+		  while (changed[start - 1])
+		    start--;
+		  while (other_changed[--j])
+		    continue;
+		}
+
+	      /* Set CORRESPONDING to the end of the changed run, at the last
+		 point where it corresponds to a changed run in the other file.
+		 CORRESPONDING == I_END means no such point has been found.  */
+	      corresponding = other_changed[j - 1] ? i : i_end;
+
+	      /* Move the changed region forward, so long as the
+		 first changed line matches the following unchanged one.
+		 This merges with following changed regions.
+		 Do this second, so that if there are no merges,
+		 the changed region is moved forward as far as possible.  */
+
+	      while (i != i_end && equivs[start] == equivs[i])
+		{
+		  changed[start++] = 0;
+		  changed[i++] = 1;
+		  while (changed[i])
+		    i++;
+		  while (other_changed[++j])
+		    corresponding = i;
+		}
+	    }
+	  while (runlength != i - start);
+
+	  /* If possible, move the fully-merged run of changes
+	     back to a corresponding run in the other file.  */
+
+	  while (corresponding < i)
+	    {
+	      changed[--start] = 1;
+	      changed[--i] = 0;
+	      while (other_changed[--j])
+		continue;
+	    }
+	}
+    }
+}
+
+/* Cons an additional entry onto the front of an edit script OLD.
+   LINE0 and LINE1 are the first affected lines in the two files (origin 0).
+   DELETED is the number of lines deleted here from file 0.
+   INSERTED is the number of lines inserted here in file 1.
+
+   If DELETED is 0 then LINE0 is the number of the line before
+   which the insertion was done; vice versa for INSERTED and LINE1.  */
+
+static struct change *
+add_change (line0, line1, deleted, inserted, old)
+     int line0, line1, deleted, inserted;
+     struct change *old;
+{
+  struct change *new = (struct change *) xmalloc (sizeof (struct change));
+
+  new->line0 = line0;
+  new->line1 = line1;
+  new->inserted = inserted;
+  new->deleted = deleted;
+  new->link = old;
+  return new;
+}
+
+/* Scan the tables of which lines are inserted and deleted,
+   producing an edit script in reverse order.  */
+
+static struct change *
+build_reverse_script (filevec)
+     struct file_data const filevec[];
+{
+  struct change *script = 0;
+  char *changed0 = filevec[0].changed_flag;
+  char *changed1 = filevec[1].changed_flag;
+  int len0 = filevec[0].buffered_lines;
+  int len1 = filevec[1].buffered_lines;
+
+  /* Note that changedN[len0] does exist, and contains 0.  */
+
+  int i0 = 0, i1 = 0;
+
+  while (i0 < len0 || i1 < len1)
+    {
+      if (changed0[i0] || changed1[i1])
+	{
+	  int line0 = i0, line1 = i1;
+
+	  /* Find # lines changed here in each file.  */
+	  while (changed0[i0]) ++i0;
+	  while (changed1[i1]) ++i1;
+
+	  /* Record this change.  */
+	  script = add_change (line0, line1, i0 - line0, i1 - line1, script);
+	}
+
+      /* We have reached lines in the two files that match each other.  */
+      i0++, i1++;
+    }
+
+  return script;
+}
+
+/* Scan the tables of which lines are inserted and deleted,
+   producing an edit script in forward order.  */
+
+static struct change *
+build_script (filevec)
+     struct file_data const filevec[];
+{
+  struct change *script = 0;
+  char *changed0 = filevec[0].changed_flag;
+  char *changed1 = filevec[1].changed_flag;
+  int i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
+
+  /* Note that changedN[-1] does exist, and contains 0.  */
+
+  while (i0 >= 0 || i1 >= 0)
+    {
+      if (changed0[i0 - 1] || changed1[i1 - 1])
+	{
+	  int line0 = i0, line1 = i1;
+
+	  /* Find # lines changed here in each file.  */
+	  while (changed0[i0 - 1]) --i0;
+	  while (changed1[i1 - 1]) --i1;
+
+	  /* Record this change.  */
+	  script = add_change (i0, i1, line0 - i0, line1 - i1, script);
+	}
+
+      /* We have reached lines in the two files that match each other.  */
+      i0--, i1--;
+    }
+
+  return script;
+}
+
+/* If CHANGES, briefly report that two files differed.  */
+static void
+briefly_report (changes, filevec)
+     int changes;
+     struct file_data const filevec[];
+{
+  if (changes)
+    message (no_details_flag ? "Files %s and %s differ\n"
+	     : "Binary files %s and %s differ\n",
+	     filevec[0].name, filevec[1].name);
+}
+
+/* Report the differences of two files.  DEPTH is the current directory
+   depth. */
+int
+diff_2_files (filevec, depth)
+     struct file_data filevec[];
+     int depth;
+{
+  int diags;
+  int i;
+  struct change *e, *p;
+  struct change *script;
+  int changes;
+
+
+  /* If we have detected that either file is binary,
+     compare the two files as binary.  This can happen
+     only when the first chunk is read.
+     Also, --brief without any --ignore-* options means
+     we can speed things up by treating the files as binary.  */
+
+  if (read_files (filevec, no_details_flag & ~ignore_some_changes))
+    {
+      /* Files with different lengths must be different.  */
+      if (filevec[0].stat.st_size != filevec[1].stat.st_size
+	  && (filevec[0].desc < 0 || S_ISREG (filevec[0].stat.st_mode))
+	  && (filevec[1].desc < 0 || S_ISREG (filevec[1].stat.st_mode)))
+	changes = 1;
+
+      /* Standard input equals itself.  */
+      else if (filevec[0].desc == filevec[1].desc)
+	changes = 0;
+
+      else
+	/* Scan both files, a buffer at a time, looking for a difference.  */
+	{
+	  /* Allocate same-sized buffers for both files.  */
+	  size_t buffer_size = buffer_lcm (STAT_BLOCKSIZE (filevec[0].stat),
+					   STAT_BLOCKSIZE (filevec[1].stat));
+	  for (i = 0; i < 2; i++)
+	    filevec[i].buffer = xrealloc (filevec[i].buffer, buffer_size);
+
+	  for (;;  filevec[0].buffered_chars = filevec[1].buffered_chars = 0)
+	    {
+	      /* Read a buffer's worth from both files.  */
+	      for (i = 0; i < 2; i++)
+		if (0 <= filevec[i].desc)
+		  while (filevec[i].buffered_chars != buffer_size)
+		    {
+		      int r = read (filevec[i].desc,
+				    filevec[i].buffer
+				    + filevec[i].buffered_chars,
+				    buffer_size - filevec[i].buffered_chars);
+		      if (r == 0)
+			break;
+		      if (r < 0)
+			pfatal_with_name (filevec[i].name);
+		      filevec[i].buffered_chars += r;
+		    }
+
+	      /* If the buffers differ, the files differ.  */
+	      if (filevec[0].buffered_chars != filevec[1].buffered_chars
+		  || (filevec[0].buffered_chars != 0
+		      && memcmp (filevec[0].buffer,
+				 filevec[1].buffer,
+				 filevec[0].buffered_chars) != 0))
+		{
+		  changes = 1;
+		  break;
+		}
+
+	      /* If we reach end of file, the files are the same.  */
+	      if (filevec[0].buffered_chars != buffer_size)
+		{
+		  changes = 0;
+		  break;
+		}
+	    }
+	}
+
+      briefly_report (changes, filevec);
+    }
+  else
+    {
+      /* Allocate vectors for the results of comparison:
+	 a flag for each line of each file, saying whether that line
+	 is an insertion or deletion.
+	 Allocate an extra element, always zero, at each end of each vector.  */
+
+      size_t s = filevec[0].buffered_lines + filevec[1].buffered_lines + 4;
+      filevec[0].changed_flag = xmalloc (s);
+      bzero (filevec[0].changed_flag, s);
+      filevec[0].changed_flag++;
+      filevec[1].changed_flag = filevec[0].changed_flag
+				+ filevec[0].buffered_lines + 2;
+
+      /* Some lines are obviously insertions or deletions
+	 because they don't match anything.  Detect them now, and
+	 avoid even thinking about them in the main comparison algorithm.  */
+
+      discard_confusing_lines (filevec);
+
+      /* Now do the main comparison algorithm, considering just the
+	 undiscarded lines.  */
+
+      xvec = filevec[0].undiscarded;
+      yvec = filevec[1].undiscarded;
+      diags = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
+      fdiag = (int *) xmalloc (diags * (2 * sizeof (int)));
+      bdiag = fdiag + diags;
+      fdiag += filevec[1].nondiscarded_lines + 1;
+      bdiag += filevec[1].nondiscarded_lines + 1;
+
+      /* Set TOO_EXPENSIVE to be approximate square root of input size,
+	 bounded below by 256.  */
+      too_expensive = 1;
+      for (i = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines;
+	   i != 0; i >>= 2)
+	too_expensive <<= 1;
+      too_expensive = max (256, too_expensive);
+
+      files[0] = filevec[0];
+      files[1] = filevec[1];
+
+      compareseq (0, filevec[0].nondiscarded_lines,
+		  0, filevec[1].nondiscarded_lines, no_discards);
+
+      free (fdiag - (filevec[1].nondiscarded_lines + 1));
+
+      /* Modify the results slightly to make them prettier
+	 in cases where that can validly be done.  */
+
+      shift_boundaries (filevec);
+
+      /* Get the results of comparison in the form of a chain
+	 of `struct change's -- an edit script.  */
+
+      if (output_style == OUTPUT_ED)
+	script = build_reverse_script (filevec);
+      else
+	script = build_script (filevec);
+
+      /* Set CHANGES if we had any diffs.
+	 If some changes are ignored, we must scan the script to decide.  */
+      if (ignore_blank_lines_flag || ignore_regexp_list)
+	{
+	  struct change *next = script;
+	  changes = 0;
+
+	  while (next && changes == 0)
+	    {
+	      struct change *this, *end;
+	      int first0, last0, first1, last1, deletes, inserts;
+
+	      /* Find a set of changes that belong together.  */
+	      this = next;
+	      end = find_change (next);
+
+	      /* Disconnect them from the rest of the changes, making them
+		 a hunk, and remember the rest for next iteration.  */
+	      next = end->link;
+	      end->link = 0;
+
+	      /* Determine whether this hunk is really a difference.  */
+	      analyze_hunk (this, &first0, &last0, &first1, &last1,
+			    &deletes, &inserts);
+
+	      /* Reconnect the script so it will all be freed properly.  */
+	      end->link = next;
+
+	      if (deletes || inserts)
+		changes = 1;
+	    }
+	}
+      else
+	changes = (script != 0);
+
+      if (no_details_flag)
+	briefly_report (changes, filevec);
+      else
+	{
+	  if (changes || ! no_diff_means_no_output)
+	    {
+	      /* Record info for starting up output,
+		 to be used if and when we have some output to print.  */
+	      setup_output (files[0].name, files[1].name, depth);
+
+	      switch (output_style)
+		{
+		case OUTPUT_CONTEXT:
+		  print_context_script (script, 0);
+		  break;
+
+		case OUTPUT_UNIFIED:
+		  print_context_script (script, 1);
+		  break;
+
+		case OUTPUT_ED:
+		  print_ed_script (script);
+		  break;
+
+		case OUTPUT_FORWARD_ED:
+		  pr_forward_ed_script (script);
+		  break;
+
+		case OUTPUT_RCS:
+		  print_rcs_script (script);
+		  break;
+
+		case OUTPUT_NORMAL:
+		  print_normal_script (script);
+		  break;
+
+		case OUTPUT_IFDEF:
+		  print_ifdef_script (script);
+		  break;
+
+		case OUTPUT_SDIFF:
+		  print_sdiff_script (script);
+		}
+
+	      finish_output ();
+	    }
+	}
+
+      free (filevec[0].undiscarded);
+
+      free (filevec[0].changed_flag - 1);
+
+      for (i = 1; i >= 0; --i)
+	free (filevec[i].equivs);
+
+      for (i = 0; i < 2; ++i)
+	free (filevec[i].linbuf + filevec[i].linbuf_base);
+
+      for (e = script; e; e = p)
+	{
+	  p = e->link;
+	  free (e);
+	}
+
+      if (! ROBUST_OUTPUT_STYLE (output_style))
+	for (i = 0; i < 2; ++i)
+	  if (filevec[i].missing_newline)
+	    {
+	      error ("No newline at end of file %s", filevec[i].name, "");
+	      changes = 2;
+	    }
+    }
+
+  if (filevec[0].buffer != filevec[1].buffer)
+    free (filevec[0].buffer);
+  free (filevec[1].buffer);
+
+  return changes;
+}

+ 40 - 0
sys/src/ape/cmd/diff/cmpbuf.c

@@ -0,0 +1,40 @@
+/* Buffer primitives for comparison operations.
+   Copyright (C) 1993 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#include "system.h"
+#include "cmpbuf.h"
+
+/* Least common multiple of two buffer sizes A and B.  */
+
+size_t
+buffer_lcm (a, b)
+     size_t a, b;
+{
+  size_t m, n, r;
+
+  /* Yield reasonable values if buffer sizes are zero.  */
+  if (!a)
+    return b ? b : 8 * 1024;
+  if (!b)
+    return a;
+
+  /* n = gcd (a, b) */
+  for (m = a, n = b;  (r = m % n) != 0;  m = n, n = r)
+    continue;
+
+  return a/n * b;
+}

+ 20 - 0
sys/src/ape/cmd/diff/cmpbuf.h

@@ -0,0 +1,20 @@
+/* Buffer primitives for comparison operations.
+   Copyright (C) 1993 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+size_t buffer_lcm PARAMS((size_t, size_t));

+ 118 - 0
sys/src/ape/cmd/diff/config.h

@@ -0,0 +1,118 @@
+/* config.h.  Generated automatically by configure.  */
+/* config.hin.  Generated automatically from configure.in by autoheader.  */
+
+/* Define if using alloca.c.  */
+/* #undef C_ALLOCA */
+
+/* Define if the closedir function returns void instead of int.  */
+/* #undef CLOSEDIR_VOID */
+
+/* Define to empty if the keyword does not work.  */
+/* #undef const */
+
+/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
+   This function is required for alloca.c support on those systems.  */
+/* #undef CRAY_STACKSEG_END */
+
+/* Define if you have <alloca.h> and it should be used (not on Ultrix).  */
+/* #undef HAVE_ALLOCA_H */
+
+/* Define if you don't have vprintf but do have _doprnt.  */
+/* #undef HAVE_DOPRNT */
+
+/* Define if your struct stat has st_blksize.  */
+/* #define HAVE_ST_BLKSIZE 1 */
+
+/* Define if you have <vfork.h>.  */
+/* #undef HAVE_VFORK_H */
+
+/* Define if you have the vprintf function.  */
+#define HAVE_VPRINTF 1
+
+/* Define if on MINIX.  */
+/* #undef _MINIX */
+
+/* Define to `int' if <sys/types.h> doesn't define.  */
+/* #undef pid_t */
+
+/* Define if the system does not provide POSIX.1 features except
+   with this defined.  */
+/* #undef _POSIX_1_SOURCE */
+
+/* Define if you need to in order for stat and other things to work.  */
+/* #undef _POSIX_SOURCE */
+
+/* Define as the return type of signal handlers (int or void).  */
+#define RETSIGTYPE void
+
+/* If using the C implementation of alloca, define if you know the
+   direction of stack growth for your system; otherwise it will be
+   automatically deduced at run-time.
+	STACK_DIRECTION > 0 => grows toward higher addresses
+	STACK_DIRECTION < 0 => grows toward lower addresses
+	STACK_DIRECTION = 0 => direction of growth unknown
+ */
+/* #undef STACK_DIRECTION */
+
+/* Define if the `S_IS*' macros in <sys/stat.h> do not work properly.  */
+/* #undef STAT_MACROS_BROKEN */
+
+/* Define if you have the ANSI C header files.  */
+#define STDC_HEADERS 1
+
+/* Define if <sys/wait.h> is compatible with Posix applications.  */
+#define HAVE_SYS_WAIT_H 1
+
+/* Define vfork as fork if vfork does not work.  */
+/* #undef vfork */
+
+/* Define if you have the dup2 function.  */
+#define HAVE_DUP2 1
+
+/* Define if you have the memchr function.  */
+#define HAVE_MEMCHR 1
+
+/* Define if you have the sigaction function.  */
+#define HAVE_SIGACTION 1
+
+/* Define if you have the strchr function.  */
+#define HAVE_STRCHR 1
+
+/* Define if you have the strerror function.  */
+#define HAVE_STRERROR 1
+
+/* Define if you have the tmpnam function.  */
+#define HAVE_TMPNAM 1
+
+/* Define if you have the <dirent.h> header file.  */
+#define HAVE_DIRENT_H 1
+
+/* Define if you have the <fcntl.h> header file.  */
+#define HAVE_FCNTL_H 1
+
+/* Define if you have the <limits.h> header file.  */
+#define HAVE_LIMITS_H 1
+
+/* Define if you have the <ndir.h> header file.  */
+/* #undef HAVE_NDIR_H */
+
+/* Define if you have the <stdlib.h> header file.  */
+#define HAVE_STDLIB_H 1
+
+/* Define if you have the <string.h> header file.  */
+#define HAVE_STRING_H 1
+
+/* Define if you have the <sys/dir.h> header file.  */
+/* #undef HAVE_SYS_DIR_H */
+
+/* Define if you have the <sys/file.h> header file.  */
+#define HAVE_SYS_FILE_H 1
+
+/* Define if you have the <sys/ndir.h> header file.  */
+/* #undef HAVE_SYS_NDIR_H */
+
+/* Define if you have the <time.h> header file.  */
+#define HAVE_TIME_H 1
+
+/* Define if you have the <unistd.h> header file.  */
+#define HAVE_UNISTD_H 1

+ 468 - 0
sys/src/ape/cmd/diff/context.c

@@ -0,0 +1,468 @@
+/* Context-format output routines for GNU DIFF.
+   Copyright (C) 1988,1989,1991,1992,1993,1994 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#include "diff.h"
+
+static struct change *find_hunk PARAMS((struct change *));
+static void find_function PARAMS((struct file_data const *, int, char const **, size_t *));
+static void mark_ignorable PARAMS((struct change *));
+static void pr_context_hunk PARAMS((struct change *));
+static void pr_unidiff_hunk PARAMS((struct change *));
+static void print_context_label PARAMS ((char const *, struct file_data *, char const *));
+static void print_context_number_range PARAMS((struct file_data const *, int, int));
+static void print_unidiff_number_range PARAMS((struct file_data const *, int, int));
+
+/* Last place find_function started searching from.  */
+static int find_function_last_search;
+
+/* The value find_function returned when it started searching there.  */
+static int find_function_last_match;
+
+/* Print a label for a context diff, with a file name and date or a label.  */
+
+static void
+print_context_label (mark, inf, label)
+     char const *mark;
+     struct file_data *inf;
+     char const *label;
+{
+  if (label)
+    fprintf (outfile, "%s %s\n", mark, label);
+  else
+    {
+      char const *ct = ctime (&inf->stat.st_mtime);
+      if (!ct)
+	ct = "?\n";
+      /* See Posix.2 section 4.17.6.1.4 for this format.  */
+      fprintf (outfile, "%s %s\t%s", mark, inf->name, ct);
+    }
+}
+
+/* Print a header for a context diff, with the file names and dates.  */
+
+void
+print_context_header (inf, unidiff_flag)
+     struct file_data inf[];
+     int unidiff_flag;
+{
+  if (unidiff_flag)
+    {
+      print_context_label ("---", &inf[0], file_label[0]);
+      print_context_label ("+++", &inf[1], file_label[1]);
+    }
+  else
+    {
+      print_context_label ("***", &inf[0], file_label[0]);
+      print_context_label ("---", &inf[1], file_label[1]);
+    }
+}
+
+/* Print an edit script in context format.  */
+
+void
+print_context_script (script, unidiff_flag)
+     struct change *script;
+     int unidiff_flag;
+{
+  if (ignore_blank_lines_flag || ignore_regexp_list)
+    mark_ignorable (script);
+  else
+    {
+      struct change *e;
+      for (e = script; e; e = e->link)
+	e->ignore = 0;
+    }
+
+  find_function_last_search = - files[0].prefix_lines;
+  find_function_last_match = find_function_last_search - 1;
+
+  if (unidiff_flag)
+    print_script (script, find_hunk, pr_unidiff_hunk);
+  else
+    print_script (script, find_hunk, pr_context_hunk);
+}
+
+/* Print a pair of line numbers with a comma, translated for file FILE.
+   If the second number is not greater, use the first in place of it.
+
+   Args A and B are internal line numbers.
+   We print the translated (real) line numbers.  */
+
+static void
+print_context_number_range (file, a, b)
+     struct file_data const *file;
+     int a, b;
+{
+  int trans_a, trans_b;
+  translate_range (file, a, b, &trans_a, &trans_b);
+
+  /* Note: we can have B < A in the case of a range of no lines.
+     In this case, we should print the line number before the range,
+     which is B.  */
+  if (trans_b > trans_a)
+    fprintf (outfile, "%d,%d", trans_a, trans_b);
+  else
+    fprintf (outfile, "%d", trans_b);
+}
+
+/* Print a portion of an edit script in context format.
+   HUNK is the beginning of the portion to be printed.
+   The end is marked by a `link' that has been nulled out.
+
+   Prints out lines from both files, and precedes each
+   line with the appropriate flag-character.  */
+
+static void
+pr_context_hunk (hunk)
+     struct change *hunk;
+{
+  int first0, last0, first1, last1, show_from, show_to, i;
+  struct change *next;
+  char const *prefix;
+  char const *function;
+  size_t function_length;
+  FILE *out;
+
+  /* Determine range of line numbers involved in each file.  */
+
+  analyze_hunk (hunk, &first0, &last0, &first1, &last1, &show_from, &show_to);
+
+  if (!show_from && !show_to)
+    return;
+
+  /* Include a context's width before and after.  */
+
+  i = - files[0].prefix_lines;
+  first0 = max (first0 - context, i);
+  first1 = max (first1 - context, i);
+  last0 = min (last0 + context, files[0].valid_lines - 1);
+  last1 = min (last1 + context, files[1].valid_lines - 1);
+
+  /* If desired, find the preceding function definition line in file 0.  */
+  function = 0;
+  if (function_regexp_list)
+    find_function (&files[0], first0, &function, &function_length);
+
+  begin_output ();
+  out = outfile;
+
+  /* If we looked for and found a function this is part of,
+     include its name in the header of the diff section.  */
+  fprintf (out, "***************");
+
+  if (function)
+    {
+      fprintf (out, " ");
+      fwrite (function, 1, min (function_length - 1, 40), out);
+    }
+
+  fprintf (out, "\n*** ");
+  print_context_number_range (&files[0], first0, last0);
+  fprintf (out, " ****\n");
+
+  if (show_from)
+    {
+      next = hunk;
+
+      for (i = first0; i <= last0; i++)
+	{
+	  /* Skip past changes that apply (in file 0)
+	     only to lines before line I.  */
+
+	  while (next && next->line0 + next->deleted <= i)
+	    next = next->link;
+
+	  /* Compute the marking for line I.  */
+
+	  prefix = " ";
+	  if (next && next->line0 <= i)
+	    /* The change NEXT covers this line.
+	       If lines were inserted here in file 1, this is "changed".
+	       Otherwise it is "deleted".  */
+	    prefix = (next->inserted > 0 ? "!" : "-");
+
+	  print_1_line (prefix, &files[0].linbuf[i]);
+	}
+    }
+
+  fprintf (out, "--- ");
+  print_context_number_range (&files[1], first1, last1);
+  fprintf (out, " ----\n");
+
+  if (show_to)
+    {
+      next = hunk;
+
+      for (i = first1; i <= last1; i++)
+	{
+	  /* Skip past changes that apply (in file 1)
+	     only to lines before line I.  */
+
+	  while (next && next->line1 + next->inserted <= i)
+	    next = next->link;
+
+	  /* Compute the marking for line I.  */
+
+	  prefix = " ";
+	  if (next && next->line1 <= i)
+	    /* The change NEXT covers this line.
+	       If lines were deleted here in file 0, this is "changed".
+	       Otherwise it is "inserted".  */
+	    prefix = (next->deleted > 0 ? "!" : "+");
+
+	  print_1_line (prefix, &files[1].linbuf[i]);
+	}
+    }
+}
+
+/* Print a pair of line numbers with a comma, translated for file FILE.
+   If the second number is smaller, use the first in place of it.
+   If the numbers are equal, print just one number.
+
+   Args A and B are internal line numbers.
+   We print the translated (real) line numbers.  */
+
+static void
+print_unidiff_number_range (file, a, b)
+     struct file_data const *file;
+     int a, b;
+{
+  int trans_a, trans_b;
+  translate_range (file, a, b, &trans_a, &trans_b);
+
+  /* Note: we can have B < A in the case of a range of no lines.
+     In this case, we should print the line number before the range,
+     which is B.  */
+  if (trans_b <= trans_a)
+    fprintf (outfile, trans_b == trans_a ? "%d" : "%d,0", trans_b);
+  else
+    fprintf (outfile, "%d,%d", trans_a, trans_b - trans_a + 1);
+}
+
+/* Print a portion of an edit script in unidiff format.
+   HUNK is the beginning of the portion to be printed.
+   The end is marked by a `link' that has been nulled out.
+
+   Prints out lines from both files, and precedes each
+   line with the appropriate flag-character.  */
+
+static void
+pr_unidiff_hunk (hunk)
+     struct change *hunk;
+{
+  int first0, last0, first1, last1, show_from, show_to, i, j, k;
+  struct change *next;
+  char const *function;
+  size_t function_length;
+  FILE *out;
+
+  /* Determine range of line numbers involved in each file.  */
+
+  analyze_hunk (hunk, &first0, &last0, &first1, &last1, &show_from, &show_to);
+
+  if (!show_from && !show_to)
+    return;
+
+  /* Include a context's width before and after.  */
+
+  i = - files[0].prefix_lines;
+  first0 = max (first0 - context, i);
+  first1 = max (first1 - context, i);
+  last0 = min (last0 + context, files[0].valid_lines - 1);
+  last1 = min (last1 + context, files[1].valid_lines - 1);
+
+  /* If desired, find the preceding function definition line in file 0.  */
+  function = 0;
+  if (function_regexp_list)
+    find_function (&files[0], first0, &function, &function_length);
+
+  begin_output ();
+  out = outfile;
+
+  fprintf (out, "@@ -");
+  print_unidiff_number_range (&files[0], first0, last0);
+  fprintf (out, " +");
+  print_unidiff_number_range (&files[1], first1, last1);
+  fprintf (out, " @@");
+
+  /* If we looked for and found a function this is part of,
+     include its name in the header of the diff section.  */
+
+  if (function)
+    {
+      putc (' ', out);
+      fwrite (function, 1, min (function_length - 1, 40), out);
+    }
+  putc ('\n', out);
+
+  next = hunk;
+  i = first0;
+  j = first1;
+
+  while (i <= last0 || j <= last1)
+    {
+
+      /* If the line isn't a difference, output the context from file 0. */
+
+      if (!next || i < next->line0)
+	{
+	  putc (tab_align_flag ? '\t' : ' ', out);
+	  print_1_line (0, &files[0].linbuf[i++]);
+	  j++;
+	}
+      else
+	{
+	  /* For each difference, first output the deleted part. */
+
+	  k = next->deleted;
+	  while (k--)
+	    {
+	      putc ('-', out);
+	      if (tab_align_flag)
+		putc ('\t', out);
+	      print_1_line (0, &files[0].linbuf[i++]);
+	    }
+
+	  /* Then output the inserted part. */
+
+	  k = next->inserted;
+	  while (k--)
+	    {
+	      putc ('+', out);
+	      if (tab_align_flag)
+		putc ('\t', out);
+	      print_1_line (0, &files[1].linbuf[j++]);
+	    }
+
+	  /* We're done with this hunk, so on to the next! */
+
+	  next = next->link;
+	}
+    }
+}
+
+/* Scan a (forward-ordered) edit script for the first place that more than
+   2*CONTEXT unchanged lines appear, and return a pointer
+   to the `struct change' for the last change before those lines.  */
+
+static struct change *
+find_hunk (start)
+     struct change *start;
+{
+  struct change *prev;
+  int top0, top1;
+  int thresh;
+
+  do
+    {
+      /* Compute number of first line in each file beyond this changed.  */
+      top0 = start->line0 + start->deleted;
+      top1 = start->line1 + start->inserted;
+      prev = start;
+      start = start->link;
+      /* Threshold distance is 2*CONTEXT between two non-ignorable changes,
+	 but only CONTEXT if one is ignorable.  */
+      thresh = ((prev->ignore || (start && start->ignore))
+		? context
+		: 2 * context + 1);
+      /* It is not supposed to matter which file we check in the end-test.
+	 If it would matter, crash.  */
+      if (start && start->line0 - top0 != start->line1 - top1)
+	abort ();
+    } while (start
+	     /* Keep going if less than THRESH lines
+		elapse before the affected line.  */
+	     && start->line0 < top0 + thresh);
+
+  return prev;
+}
+
+/* Set the `ignore' flag properly in each change in SCRIPT.
+   It should be 1 if all the lines inserted or deleted in that change
+   are ignorable lines.  */
+
+static void
+mark_ignorable (script)
+     struct change *script;
+{
+  while (script)
+    {
+      struct change *next = script->link;
+      int first0, last0, first1, last1, deletes, inserts;
+
+      /* Turn this change into a hunk: detach it from the others.  */
+      script->link = 0;
+
+      /* Determine whether this change is ignorable.  */
+      analyze_hunk (script, &first0, &last0, &first1, &last1, &deletes, &inserts);
+      /* Reconnect the chain as before.  */
+      script->link = next;
+
+      /* If the change is ignorable, mark it.  */
+      script->ignore = (!deletes && !inserts);
+
+      /* Advance to the following change.  */
+      script = next;
+    }
+}
+
+/* Find the last function-header line in FILE prior to line number LINENUM.
+   This is a line containing a match for the regexp in `function_regexp'.
+   Store the address of the line text into LINEP and the length of the
+   line into LENP.
+   Do not store anything if no function-header is found.  */
+
+static void
+find_function (file, linenum, linep, lenp)
+     struct file_data const *file;
+     int linenum;
+     char const **linep;
+     size_t *lenp;
+{
+  int i = linenum;
+  int last = find_function_last_search;
+  find_function_last_search = i;
+
+  while (--i >= last)
+    {
+      /* See if this line is what we want.  */
+      struct regexp_list *r;
+      char const *line = file->linbuf[i];
+      size_t len = file->linbuf[i + 1] - line;
+
+      for (r = function_regexp_list; r; r = r->next)
+	if (0 <= re_search (&r->buf, line, len, 0, len, 0))
+	  {
+	    *linep = line;
+	    *lenp = len;
+	    find_function_last_match = i;
+	    return;
+	  }
+    }
+  /* If we search back to where we started searching the previous time,
+     find the line we found last time.  */
+  if (find_function_last_match >= - file->prefix_lines)
+    {
+      i = find_function_last_match;
+      *linep = file->linbuf[i];
+      *lenp = file->linbuf[i + 1] - *linep;
+      return;
+    }
+  return;
+}

+ 71 - 0
sys/src/ape/cmd/diff/diagmeet.note

@@ -0,0 +1,71 @@
+Here is a comparison matrix which shows a case in which
+it is possible for the forward and backward scan in `diag'
+to meet along a nonzero length of diagonal simultaneous
+(so that bdiag[d] and fdiag[d] are not equal)
+even though there is no snake on that diagonal at the meeting point.
+
+
+     85   1   1   1  159  1   1   17
+        1   2   3   4
+60
+    1   2
+1
+    2  	    2   3   4
+71
+    3       3  	4   5
+85
+    4  	3   4	5
+17
+    5  	4   5
+1
+    6       4  	5   6
+183
+    7       5   6   7
+10
+    8  	    6  	7
+1
+    9           6   7  	8
+12
+                7   8   9  10
+13
+       10       8   9  10
+14
+           10   9  10
+17
+       10      10
+1
+   10   9  10
+1
+	8      10      10      10
+183
+    8   7	9       9      	9
+10
+    7   6	8   9   8      	8
+1
+    6   5    		7       7
+1
+            5          	6      	6
+1
+	       	5      	5      	5
+50
+	    5   4      	4      	4
+1
+	            4   3	3
+85
+	    5   4   3   2       2
+1
+	                    2   1
+17
+	    5   4   3   2   1       1
+1
+		                1   0
+     85   1   1   1  159  1   1  17
+
+
+
+
+
+
+
+
+

+ 1124 - 0
sys/src/ape/cmd/diff/diff.c

@@ -0,0 +1,1124 @@
+/* GNU DIFF main routine.
+   Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+/* GNU DIFF was written by Mike Haertel, David Hayes,
+   Richard Stallman, Len Tower, and Paul Eggert.  */
+
+/* $FreeBSD: src/contrib/diff/diff.c,v 1.3 1999/11/26 02:51:44 obrien Exp $ */
+
+#define GDIFF_MAIN
+#include "diff.h"
+#include <signal.h>
+#include "getopt.h"
+#ifdef __FreeBSD__
+#include <locale.h>
+#include <fnmatch.h>
+#else
+#include "fnmatch.h"
+#endif
+#include "prepend_args.h"
+
+#ifndef DEFAULT_WIDTH
+#define DEFAULT_WIDTH 130
+#endif
+
+#ifndef GUTTER_WIDTH_MINIMUM
+#define GUTTER_WIDTH_MINIMUM 3
+#endif
+
+static char const *filetype PARAMS((struct stat const *));
+static char *option_list PARAMS((char **, int));
+static int add_exclude_file PARAMS((char const *));
+static int ck_atoi PARAMS((char const *, int *));
+static int compare_files PARAMS((char const *, char const *, char const *, char const *, int));
+static int specify_format PARAMS((char **, char *));
+static void add_exclude PARAMS((char const *));
+static void add_regexp PARAMS((struct regexp_list **, char const *));
+static void specify_style PARAMS((enum output_style));
+static void try_help PARAMS((char const *));
+static void check_stdout PARAMS((void));
+static void usage PARAMS((void));
+
+/* Nonzero for -r: if comparing two directories,
+   compare their common subdirectories recursively.  */
+
+static int recursive;
+
+/* For debugging: don't do discard_confusing_lines.  */
+
+int no_discards;
+
+#if HAVE_SETMODE
+/* I/O mode: nonzero only if using binary input/output.  */
+static int binary_I_O;
+#endif
+
+/* Return a string containing the command options with which diff was invoked.
+   Spaces appear between what were separate ARGV-elements.
+   There is a space at the beginning but none at the end.
+   If there were no options, the result is an empty string.
+
+   Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
+   the length of that vector.  */
+
+static char *
+option_list (optionvec, count)
+     char **optionvec;  /* Was `vector', but that collides on Alliant.  */
+     int count;
+{
+  int i;
+  size_t length = 0;
+  char *result;
+
+  for (i = 0; i < count; i++)
+    length += strlen (optionvec[i]) + 1;
+
+  result = xmalloc (length + 1);
+  result[0] = 0;
+
+  for (i = 0; i < count; i++)
+    {
+      strcat (result, " ");
+      strcat (result, optionvec[i]);
+    }
+
+  return result;
+}
+
+/* Convert STR to a positive integer, storing the result in *OUT.
+   If STR is not a valid integer, return -1 (otherwise 0). */
+static int
+ck_atoi (str, out)
+     char const *str;
+     int *out;
+{
+  char const *p;
+  for (p = str; *p; p++)
+    if (*p < '0' || *p > '9')
+      return -1;
+
+  *out = atoi (optarg);
+  return 0;
+}
+
+/* Keep track of excluded file name patterns.  */
+
+static char const **exclude;
+static int exclude_alloc, exclude_count;
+
+int
+excluded_filename (f)
+     char const *f;
+{
+  int i;
+  for (i = 0;  i < exclude_count;  i++)
+    if (fnmatch (exclude[i], f, 0) == 0)
+      return 1;
+  return 0;
+}
+
+static void
+add_exclude (pattern)
+     char const *pattern;
+{
+  if (exclude_alloc <= exclude_count)
+    exclude = (char const **)
+	      (exclude_alloc == 0
+	       ? xmalloc ((exclude_alloc = 64) * sizeof (*exclude))
+	       : xrealloc (exclude, (exclude_alloc *= 2) * sizeof (*exclude)));
+
+  exclude[exclude_count++] = pattern;
+}
+
+static int
+add_exclude_file (name)
+     char const *name;
+{
+  struct file_data f;
+  char *p, *q, *lim;
+
+  f.name = optarg;
+  f.desc = (strcmp (name, "-") == 0
+	    ? STDIN_FILENO
+	    : open (name, O_RDONLY, 0));
+  if (f.desc < 0 || fstat (f.desc, &f.stat) != 0)
+    return -1;
+
+  sip (&f, 1);
+  slurp (&f);
+
+  for (p = f.buffer, lim = p + f.buffered_chars;  p < lim;  p = q)
+    {
+      q = (char *) memchr (p, '\n', lim - p);
+      if (!q)
+	q = lim;
+      *q++ = 0;
+      add_exclude (p);
+    }
+
+  return close (f.desc);
+}
+
+/* The numbers 129- that appear in the fourth element of some entries
+   tell the big switch in `main' how to process those options.  */
+
+static struct option const longopts[] =
+{
+  {"ignore-blank-lines", 0, 0, 'B'},
+  {"context", 2, 0, 'C'},
+  {"ifdef", 1, 0, 'D'},
+  {"show-function-line", 1, 0, 'F'},
+  {"speed-large-files", 0, 0, 'H'},
+  {"ignore-matching-lines", 1, 0, 'I'},
+  {"label", 1, 0, 'L'},
+  {"file-label", 1, 0, 'L'},	/* An alias, no longer recommended */
+  {"new-file", 0, 0, 'N'},
+  {"entire-new-file", 0, 0, 'N'},	/* An alias, no longer recommended */
+  {"unidirectional-new-file", 0, 0, 'P'},
+  {"starting-file", 1, 0, 'S'},
+  {"initial-tab", 0, 0, 'T'},
+  {"width", 1, 0, 'W'},
+  {"text", 0, 0, 'a'},
+  {"ascii", 0, 0, 'a'},		/* An alias, no longer recommended */
+  {"ignore-space-change", 0, 0, 'b'},
+  {"minimal", 0, 0, 'd'},
+  {"ed", 0, 0, 'e'},
+  {"forward-ed", 0, 0, 'f'},
+  {"ignore-case", 0, 0, 'i'},
+  {"paginate", 0, 0, 'l'},
+  {"print", 0, 0, 'l'},		/* An alias, no longer recommended */
+  {"rcs", 0, 0, 'n'},
+  {"show-c-function", 0, 0, 'p'},
+  {"brief", 0, 0, 'q'},
+  {"recursive", 0, 0, 'r'},
+  {"report-identical-files", 0, 0, 's'},
+  {"expand-tabs", 0, 0, 't'},
+  {"version", 0, 0, 'v'},
+  {"ignore-all-space", 0, 0, 'w'},
+  {"exclude", 1, 0, 'x'},
+  {"exclude-from", 1, 0, 'X'},
+  {"side-by-side", 0, 0, 'y'},
+  {"unified", 2, 0, 'U'},
+  {"left-column", 0, 0, 129},
+  {"suppress-common-lines", 0, 0, 130},
+  {"sdiff-merge-assist", 0, 0, 131},
+  {"old-line-format", 1, 0, 132},
+  {"new-line-format", 1, 0, 133},
+  {"unchanged-line-format", 1, 0, 134},
+  {"line-format", 1, 0, 135},
+  {"old-group-format", 1, 0, 136},
+  {"new-group-format", 1, 0, 137},
+  {"unchanged-group-format", 1, 0, 138},
+  {"changed-group-format", 1, 0, 139},
+  {"horizon-lines", 1, 0, 140},
+  {"help", 0, 0, 141},
+  {"binary", 0, 0, 142},
+  {0, 0, 0, 0}
+};
+
+int
+main (argc, argv)
+     int argc;
+     char *argv[];
+{
+  int val;
+  int c;
+  int prev = -1;
+  int width = DEFAULT_WIDTH;
+  int show_c_function = 0;
+
+#ifdef __FreeBSD__
+  setlocale(LC_ALL, "");
+#endif
+  /* Do our initializations.  */
+  initialize_main (&argc, &argv);
+  program_name = argv[0];
+  output_style = OUTPUT_NORMAL;
+  context = -1;
+
+  prepend_default_options (getenv ("DIFF_OPTIONS"), &argc, &argv);
+
+  /* Decode the options.  */
+
+  while ((c = getopt_long (argc, argv,
+			   "0123456789abBcC:dD:efF:hHiI:lL:nNopPqrsS:tTuU:vwW:x:X:y",
+			   longopts, 0)) != EOF)
+    {
+      switch (c)
+	{
+	  /* All digits combine in decimal to specify the context-size.  */
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	case '0':
+	  if (context == -1)
+	    context = 0;
+	  /* If a context length has already been specified,
+	     more digits allowed only if they follow right after the others.
+	     Reject two separate runs of digits, or digits after -C.  */
+	  else if (prev < '0' || prev > '9')
+	    fatal ("context length specified twice");
+
+	  context = context * 10 + c - '0';
+	  break;
+
+	case 'a':
+	  /* Treat all files as text files; never treat as binary.  */
+	  always_text_flag = 1;
+	  break;
+
+	case 'b':
+	  /* Ignore changes in amount of white space.  */
+	  ignore_space_change_flag = 1;
+	  ignore_some_changes = 1;
+	  ignore_some_line_changes = 1;
+	  break;
+
+	case 'B':
+	  /* Ignore changes affecting only blank lines.  */
+	  ignore_blank_lines_flag = 1;
+	  ignore_some_changes = 1;
+	  break;
+
+	case 'C':		/* +context[=lines] */
+	case 'U':		/* +unified[=lines] */
+	  if (optarg)
+	    {
+	      if (context >= 0)
+		fatal ("context length specified twice");
+
+	      if (ck_atoi (optarg, &context))
+		fatal ("invalid context length argument");
+	    }
+
+	  /* Falls through.  */
+	case 'c':
+	  /* Make context-style output.  */
+	  specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
+	  break;
+
+	case 'd':
+	  /* Don't discard lines.  This makes things slower (sometimes much
+	     slower) but will find a guaranteed minimal set of changes.  */
+	  no_discards = 1;
+	  break;
+
+	case 'D':
+	  /* Make merged #ifdef output.  */
+	  specify_style (OUTPUT_IFDEF);
+	  {
+	    int i, err = 0;
+	    static char const C_ifdef_group_formats[] =
+	      "#ifndef %s\n%%<#endif /* not %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c%%=%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
+	    char *b = xmalloc (sizeof (C_ifdef_group_formats)
+			       + 7 * strlen(optarg) - 14 /* 7*"%s" */
+			       - 8 /* 5*"%%" + 3*"%c" */);
+	    sprintf (b, C_ifdef_group_formats,
+		     optarg, optarg, 0,
+		     optarg, optarg, 0, 0,
+		     optarg, optarg, optarg);
+	    for (i = 0; i < 4; i++)
+	      {
+		err |= specify_format (&group_format[i], b);
+		b += strlen (b) + 1;
+	      }
+	    if (err)
+	      error ("conflicting #ifdef formats", 0, 0);
+	  }
+	  break;
+
+	case 'e':
+	  /* Make output that is a valid `ed' script.  */
+	  specify_style (OUTPUT_ED);
+	  break;
+
+	case 'f':
+	  /* Make output that looks vaguely like an `ed' script
+	     but has changes in the order they appear in the file.  */
+	  specify_style (OUTPUT_FORWARD_ED);
+	  break;
+
+	case 'F':
+	  /* Show, for each set of changes, the previous line that
+	     matches the specified regexp.  Currently affects only
+	     context-style output.  */
+	  add_regexp (&function_regexp_list, optarg);
+	  break;
+
+	case 'h':
+	  /* Split the files into chunks of around 1500 lines
+	     for faster processing.  Usually does not change the result.
+
+	     This currently has no effect.  */
+	  break;
+
+	case 'H':
+	  /* Turn on heuristics that speed processing of large files
+	     with a small density of changes.  */
+	  heuristic = 1;
+	  break;
+
+	case 'i':
+	  /* Ignore changes in case.  */
+	  ignore_case_flag = 1;
+	  ignore_some_changes = 1;
+	  ignore_some_line_changes = 1;
+	  break;
+
+	case 'I':
+	  /* Ignore changes affecting only lines that match the
+	     specified regexp.  */
+	  add_regexp (&ignore_regexp_list, optarg);
+	  ignore_some_changes = 1;
+	  break;
+
+	case 'l':
+	  /* Pass the output through `pr' to paginate it.  */
+	  paginate_flag = 1;
+#if !defined(SIGCHLD) && defined(SIGCLD)
+#define SIGCHLD SIGCLD
+#endif
+#ifdef SIGCHLD
+	  /* Pagination requires forking and waiting, and
+	     System V fork+wait does not work if SIGCHLD is ignored.  */
+	  signal (SIGCHLD, SIG_DFL);
+#endif
+	  break;
+
+	case 'L':
+	  /* Specify file labels for `-c' output headers.  */
+	  if (!file_label[0])
+	    file_label[0] = optarg;
+	  else if (!file_label[1])
+	    file_label[1] = optarg;
+	  else
+	    fatal ("too many file label options");
+	  break;
+
+	case 'n':
+	  /* Output RCS-style diffs, like `-f' except that each command
+	     specifies the number of lines affected.  */
+	  specify_style (OUTPUT_RCS);
+	  break;
+
+	case 'N':
+	  /* When comparing directories, if a file appears only in one
+	     directory, treat it as present but empty in the other.  */
+	  entire_new_file_flag = 1;
+	  break;
+
+	case 'o':
+	  /* Output in the old tradition style.  */
+	  specify_style (OUTPUT_NORMAL);
+	  break;
+
+	case 'p':
+	  /* Make context-style output and show name of last C function.  */
+	  show_c_function = 1;
+	  add_regexp (&function_regexp_list, "^[_a-zA-Z$]");
+	  break;
+
+	case 'P':
+	  /* When comparing directories, if a file appears only in
+	     the second directory of the two,
+	     treat it as present but empty in the other.  */
+	  unidirectional_new_file_flag = 1;
+	  break;
+
+	case 'q':
+	  no_details_flag = 1;
+	  break;
+
+	case 'r':
+	  /* When comparing directories,
+	     recursively compare any subdirectories found.  */
+	  recursive = 1;
+	  break;
+
+	case 's':
+	  /* Print a message if the files are the same.  */
+	  print_file_same_flag = 1;
+	  break;
+
+	case 'S':
+	  /* When comparing directories, start with the specified
+	     file name.  This is used for resuming an aborted comparison.  */
+	  dir_start_file = optarg;
+	  break;
+
+	case 't':
+	  /* Expand tabs to spaces in the output so that it preserves
+	     the alignment of the input files.  */
+	  tab_expand_flag = 1;
+	  break;
+
+	case 'T':
+	  /* Use a tab in the output, rather than a space, before the
+	     text of an input line, so as to keep the proper alignment
+	     in the input line without changing the characters in it.  */
+	  tab_align_flag = 1;
+	  break;
+
+	case 'u':
+	  /* Output the context diff in unidiff format.  */
+	  specify_style (OUTPUT_UNIFIED);
+	  break;
+
+	case 'v':
+	  printf ("diff - GNU diffutils version %s\n", version_string);
+	  exit (0);
+
+	case 'w':
+	  /* Ignore horizontal white space when comparing lines.  */
+	  ignore_all_space_flag = 1;
+	  ignore_some_changes = 1;
+	  ignore_some_line_changes = 1;
+	  break;
+
+	case 'x':
+	  add_exclude (optarg);
+	  break;
+
+	case 'X':
+	  if (add_exclude_file (optarg) != 0)
+	    pfatal_with_name (optarg);
+	  break;
+
+	case 'y':
+	  /* Use side-by-side (sdiff-style) columnar output. */
+	  specify_style (OUTPUT_SDIFF);
+	  break;
+
+	case 'W':
+	  /* Set the line width for OUTPUT_SDIFF.  */
+	  if (ck_atoi (optarg, &width) || width <= 0)
+	    fatal ("column width must be a positive integer");
+	  break;
+
+	case 129:
+	  sdiff_left_only = 1;
+	  break;
+
+	case 130:
+	  sdiff_skip_common_lines = 1;
+	  break;
+
+	case 131:
+	  /* sdiff-style columns output. */
+	  specify_style (OUTPUT_SDIFF);
+	  sdiff_help_sdiff = 1;
+	  break;
+
+	case 132:
+	case 133:
+	case 134:
+	  specify_style (OUTPUT_IFDEF);
+	  if (specify_format (&line_format[c - 132], optarg) != 0)
+	    error ("conflicting line format", 0, 0);
+	  break;
+
+	case 135:
+	  specify_style (OUTPUT_IFDEF);
+	  {
+	    int i, err = 0;
+	    for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
+	      err |= specify_format (&line_format[i], optarg);
+	    if (err)
+	      error ("conflicting line format", 0, 0);
+	  }
+	  break;
+
+	case 136:
+	case 137:
+	case 138:
+	case 139:
+	  specify_style (OUTPUT_IFDEF);
+	  if (specify_format (&group_format[c - 136], optarg) != 0)
+	    error ("conflicting group format", 0, 0);
+	  break;
+
+	case 140:
+	  if (ck_atoi (optarg, &horizon_lines) || horizon_lines < 0)
+	    fatal ("horizon must be a nonnegative integer");
+	  break;
+
+	case 141:
+	  usage ();
+	  check_stdout ();
+	  exit (0);
+
+	case 142:
+	  /* Use binary I/O when reading and writing data.
+	     On Posix hosts, this has no effect.  */
+#if HAVE_SETMODE
+	  binary_I_O = 1;
+	  setmode (STDOUT_FILENO, O_BINARY);
+#endif
+	  break;
+
+	default:
+	  try_help (0);
+	}
+      prev = c;
+    }
+
+  if (argc - optind != 2)
+    try_help (argc - optind < 2 ? "missing operand" : "extra operand");
+
+
+  {
+    /*
+     *	We maximize first the half line width, and then the gutter width,
+     *	according to the following constraints:
+     *	1.  Two half lines plus a gutter must fit in a line.
+     *	2.  If the half line width is nonzero:
+     *	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
+     *	    b.  If tabs are not expanded to spaces,
+     *		a half line plus a gutter is an integral number of tabs,
+     *		so that tabs in the right column line up.
+     */
+    int t = tab_expand_flag ? 1 : TAB_WIDTH;
+    int off = (width + t + GUTTER_WIDTH_MINIMUM) / (2*t)  *  t;
+    sdiff_half_width = max (0, min (off - GUTTER_WIDTH_MINIMUM, width - off)),
+    sdiff_column2_offset = sdiff_half_width ? off : width;
+  }
+
+  if (show_c_function && output_style != OUTPUT_UNIFIED)
+    specify_style (OUTPUT_CONTEXT);
+
+  if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
+    context = 0;
+  else if (context == -1)
+    /* Default amount of context for -c.  */
+    context = 3;
+
+  if (output_style == OUTPUT_IFDEF)
+    {
+      /* Format arrays are char *, not char const *,
+	 because integer formats are temporarily modified.
+	 But it is safe to assign a constant like "%=" to a format array,
+	 since "%=" does not format any integers.  */
+      int i;
+      for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
+	if (!line_format[i])
+	  line_format[i] = "%l\n";
+      if (!group_format[OLD])
+	group_format[OLD]
+	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%<";
+      if (!group_format[NEW])
+	group_format[NEW]
+	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%>";
+      if (!group_format[UNCHANGED])
+	group_format[UNCHANGED] = "%=";
+      if (!group_format[CHANGED])
+	group_format[CHANGED] = concat (group_format[OLD],
+					group_format[NEW], "");
+    }
+
+  no_diff_means_no_output =
+    (output_style == OUTPUT_IFDEF ?
+      (!*group_format[UNCHANGED]
+       || (strcmp (group_format[UNCHANGED], "%=") == 0
+	   && !*line_format[UNCHANGED]))
+     : output_style == OUTPUT_SDIFF ? sdiff_skip_common_lines : 1);
+
+  switch_string = option_list (argv + 1, optind - 1);
+
+  val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
+
+  /* Print any messages that were saved up for last.  */
+  print_message_queue ();
+
+  check_stdout ();
+  exit (val);
+  return val;
+}
+
+/* Add the compiled form of regexp PATTERN to REGLIST.  */
+
+static void
+add_regexp (reglist, pattern)
+     struct regexp_list **reglist;
+     char const *pattern;
+{
+  struct regexp_list *r;
+  char const *m;
+
+  r = (struct regexp_list *) xmalloc (sizeof (*r));
+  bzero (r, sizeof (*r));
+  r->buf.fastmap = xmalloc (256);
+  m = re_compile_pattern (pattern, strlen (pattern), &r->buf);
+  if (m != 0)
+    error ("%s: %s", pattern, m);
+
+  /* Add to the start of the list, since it's easier than the end.  */
+  r->next = *reglist;
+  *reglist = r;
+}
+
+static void
+try_help (reason)
+     char const *reason;
+{
+  if (reason)
+    error ("%s", reason, 0);
+  error ("Try `%s --help' for more information.", program_name, 0);
+  exit (2);
+}
+
+static void
+check_stdout ()
+{
+  if (ferror (stdout) || fclose (stdout) != 0)
+    fatal ("write error");
+}
+
+static char const * const option_help[] = {
+"-i  --ignore-case  Consider upper- and lower-case to be the same.",
+"-w  --ignore-all-space  Ignore all white space.",
+"-b  --ignore-space-change  Ignore changes in the amount of white space.",
+"-B  --ignore-blank-lines  Ignore changes whose lines are all blank.",
+"-I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE.",
+#if HAVE_SETMODE
+"--binary  Read and write data in binary mode.",
+#endif
+"-a  --text  Treat all files as text.\n",
+"-c  -C NUM  --context[=NUM]  Output NUM (default 2) lines of copied context.",
+"-u  -U NUM  --unified[=NUM]  Output NUM (default 2) lines of unified context.",
+"  -NUM  Use NUM context lines.",
+"  -L LABEL  --label LABEL  Use LABEL instead of file name.",
+"  -p  --show-c-function  Show which C function each change is in.",
+"  -F RE  --show-function-line=RE  Show the most recent line matching RE.",
+"-q  --brief  Output only whether files differ.",
+"-e  --ed  Output an ed script.",
+"-n  --rcs  Output an RCS format diff.",
+"-y  --side-by-side  Output in two columns.",
+"  -w NUM  --width=NUM  Output at most NUM (default 130) characters per line.",
+"  --left-column  Output only the left column of common lines.",
+"  --suppress-common-lines  Do not output common lines.",
+"-DNAME  --ifdef=NAME  Output merged file to show `#ifdef NAME' diffs.",
+"--GTYPE-group-format=GFMT  Similar, but format GTYPE input groups with GFMT.",
+"--line-format=LFMT  Similar, but format all input lines with LFMT.",
+"--LTYPE-line-format=LFMT  Similar, but format LTYPE input lines with LFMT.",
+"  LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'.",
+"  GFMT may contain:",
+"    %<  lines from FILE1",
+"    %>  lines from FILE2",
+"    %=  lines common to FILE1 and FILE2",
+"    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER",
+"      LETTERs are as follows for new group, lower case for old group:",
+"        F  first line number",
+"        L  last line number",
+"        N  number of lines = L-F+1",
+"        E  F-1",
+"        M  L+1",
+"  LFMT may contain:",
+"    %L  contents of line",
+"    %l  contents of line, excluding any trailing newline",
+"    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number",
+"  Either GFMT or LFMT may contain:",
+"    %%  %",
+"    %c'C'  the single character C",
+"    %c'\\OOO'  the character with octal code OOO\n",
+"-l  --paginate  Pass the output through `pr' to paginate it.",
+"-t  --expand-tabs  Expand tabs to spaces in output.",
+"-T  --initial-tab  Make tabs line up by prepending a tab.\n",
+"-r  --recursive  Recursively compare any subdirectories found.",
+"-N  --new-file  Treat absent files as empty.",
+"-P  --unidirectional-new-file  Treat absent first files as empty.",
+"-s  --report-identical-files  Report when two files are the same.",
+"-x PAT  --exclude=PAT  Exclude files that match PAT.",
+"-X FILE  --exclude-from=FILE  Exclude files that match any pattern in FILE.",
+"-S FILE  --starting-file=FILE  Start with FILE when comparing directories.\n",
+"--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix.",
+"-d  --minimal  Try hard to find a smaller set of changes.",
+"-H  --speed-large-files  Assume large files and many scattered small changes.\n",
+"-v  --version  Output version info.",
+"--help  Output this help.",
+0
+};
+
+static void
+usage ()
+{
+  char const * const *p;
+
+  printf ("Usage: %s [OPTION]... FILE1 FILE2\n\n", program_name);
+  for (p = option_help;  *p;  p++)
+    printf ("  %s\n", *p);
+  printf ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
+}
+
+static int
+specify_format (var, value)
+     char **var;
+     char *value;
+{
+  int err = *var ? strcmp (*var, value) : 0;
+  *var = value;
+  return err;
+}
+
+static void
+specify_style (style)
+     enum output_style style;
+{
+  if (output_style != OUTPUT_NORMAL
+      && output_style != style)
+    error ("conflicting specifications of output style", 0, 0);
+  output_style = style;
+}
+
+static char const *
+filetype (st)
+     struct stat const *st;
+{
+  /* See Posix.2 section 4.17.6.1.1 and Table 5-1 for these formats.
+     To keep diagnostics grammatical, the returned string must start
+     with a consonant.  */
+
+  if (S_ISREG (st->st_mode))
+    {
+      if (st->st_size == 0)
+	return "regular empty file";
+      /* Posix.2 section 5.14.2 seems to suggest that we must read the file
+	 and guess whether it's C, Fortran, etc., but this is somewhat useless
+	 and doesn't reflect historical practice.  We're allowed to guess
+	 wrong, so we don't bother to read the file.  */
+      return "regular file";
+    }
+  if (S_ISDIR (st->st_mode)) return "directory";
+
+  /* other Posix.1 file types */
+#ifdef S_ISBLK
+  if (S_ISBLK (st->st_mode)) return "block special file";
+#endif
+#ifdef S_ISCHR
+  if (S_ISCHR (st->st_mode)) return "character special file";
+#endif
+#ifdef S_ISFIFO
+  if (S_ISFIFO (st->st_mode)) return "fifo";
+#endif
+
+  /* other Posix.1b file types */
+#ifdef S_TYPEISMQ
+  if (S_TYPEISMQ (st)) return "message queue";
+#endif
+#ifdef S_TYPEISSEM
+  if (S_TYPEISSEM (st)) return "semaphore";
+#endif
+#ifdef S_TYPEISSHM
+  if (S_TYPEISSHM (st)) return "shared memory object";
+#endif
+
+  /* other popular file types */
+  /* S_ISLNK is impossible with `fstat' and `stat'.  */
+#ifdef S_ISSOCK
+  if (S_ISSOCK (st->st_mode)) return "socket";
+#endif
+
+  return "weird file";
+}
+
+/* Compare two files (or dirs) with specified names
+   DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
+   (if DIR0 is 0, then the name is just NAME0, etc.)
+   This is self-contained; it opens the files and closes them.
+
+   Value is 0 if files are the same, 1 if different,
+   2 if there is a problem opening them.  */
+
+static int
+compare_files (dir0, name0, dir1, name1, depth)
+     char const *dir0, *dir1;
+     char const *name0, *name1;
+     int depth;
+{
+  struct file_data inf[2];
+  register int i;
+  int val;
+  int same_files;
+  int failed = 0;
+  char *free0 = 0, *free1 = 0;
+
+  /* If this is directory comparison, perhaps we have a file
+     that exists only in one of the directories.
+     If so, just print a message to that effect.  */
+
+  if (! ((name0 != 0 && name1 != 0)
+	 || (unidirectional_new_file_flag && name1 != 0)
+	 || entire_new_file_flag))
+    {
+      char const *name = name0 == 0 ? name1 : name0;
+      char const *dir = name0 == 0 ? dir1 : dir0;
+      message ("Only in %s: %s\n", dir, name);
+      /* Return 1 so that diff_dirs will return 1 ("some files differ").  */
+      return 1;
+    }
+
+  bzero (inf, sizeof (inf));
+
+  /* Mark any nonexistent file with -1 in the desc field.  */
+  /* Mark unopened files (e.g. directories) with -2. */
+
+  inf[0].desc = name0 == 0 ? -1 : -2;
+  inf[1].desc = name1 == 0 ? -1 : -2;
+
+  /* Now record the full name of each file, including nonexistent ones.  */
+
+  if (name0 == 0)
+    name0 = name1;
+  if (name1 == 0)
+    name1 = name0;
+
+  inf[0].name = dir0 == 0 ? name0 : (free0 = dir_file_pathname (dir0, name0));
+  inf[1].name = dir1 == 0 ? name1 : (free1 = dir_file_pathname (dir1, name1));
+
+  /* Stat the files.  Record whether they are directories.  */
+
+  for (i = 0; i <= 1; i++)
+    {
+      if (inf[i].desc != -1)
+	{
+	  int stat_result;
+
+	  if (i && filename_cmp (inf[i].name, inf[0].name) == 0)
+	    {
+	      inf[i].stat = inf[0].stat;
+	      stat_result = 0;
+	    }
+	  else if (strcmp (inf[i].name, "-") == 0)
+	    {
+	      inf[i].desc = STDIN_FILENO;
+	      stat_result = fstat (STDIN_FILENO, &inf[i].stat);
+	      if (stat_result == 0 && S_ISREG (inf[i].stat.st_mode))
+		{
+		  off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
+		  if (pos == -1)
+		    stat_result = -1;
+		  else
+		    {
+		      if (pos <= inf[i].stat.st_size)
+			inf[i].stat.st_size -= pos;
+		      else
+			inf[i].stat.st_size = 0;
+		      /* Posix.2 4.17.6.1.4 requires current time for stdin.  */
+		      time (&inf[i].stat.st_mtime);
+		    }
+		}
+	    }
+	  else
+	    stat_result = stat (inf[i].name, &inf[i].stat);
+
+	  if (stat_result != 0)
+	    {
+	      perror_with_name (inf[i].name);
+	      failed = 1;
+	    }
+	  else
+	    {
+	      inf[i].dir_p = S_ISDIR (inf[i].stat.st_mode) && inf[i].desc != 0;
+	      if (inf[1 - i].desc == -1)
+		{
+		  inf[1 - i].dir_p = inf[i].dir_p;
+		  inf[1 - i].stat.st_mode = inf[i].stat.st_mode;
+		}
+	    }
+	}
+    }
+
+  if (! failed && depth == 0 && inf[0].dir_p != inf[1].dir_p)
+    {
+      /* If one is a directory, and it was specified in the command line,
+	 use the file in that dir with the other file's basename.  */
+
+      int fnm_arg = inf[0].dir_p;
+      int dir_arg = 1 - fnm_arg;
+      char const *fnm = inf[fnm_arg].name;
+      char const *dir = inf[dir_arg].name;
+      char const *p = filename_lastdirchar (fnm);
+      char const *filename = inf[dir_arg].name
+	= dir_file_pathname (dir, p ? p + 1 : fnm);
+
+      if (strcmp (fnm, "-") == 0)
+	fatal ("can't compare - to a directory");
+
+      if (stat (filename, &inf[dir_arg].stat) != 0)
+	{
+	  perror_with_name (filename);
+	  failed = 1;
+	}
+      else
+	inf[dir_arg].dir_p = S_ISDIR (inf[dir_arg].stat.st_mode);
+    }
+
+  if (failed)
+    {
+
+      /* If either file should exist but does not, return 2.  */
+
+      val = 2;
+
+    }
+  else if ((same_files = inf[0].desc != -1 && inf[1].desc != -1
+			 && 0 < same_file (&inf[0].stat, &inf[1].stat))
+	   && no_diff_means_no_output)
+    {
+      /* The two named files are actually the same physical file.
+	 We know they are identical without actually reading them.  */
+
+      val = 0;
+    }
+  else if (inf[0].dir_p & inf[1].dir_p)
+    {
+      if (output_style == OUTPUT_IFDEF)
+	fatal ("-D option not supported with directories");
+
+      /* If both are directories, compare the files in them.  */
+
+      if (depth > 0 && !recursive)
+	{
+	  /* But don't compare dir contents one level down
+	     unless -r was specified.  */
+	  message ("Common subdirectories: %s and %s\n",
+		   inf[0].name, inf[1].name);
+	  val = 0;
+	}
+      else
+	{
+	  val = diff_dirs (inf, compare_files, depth);
+	}
+
+    }
+  else if ((inf[0].dir_p | inf[1].dir_p)
+	   || (depth > 0
+	       && (! S_ISREG (inf[0].stat.st_mode)
+		   || ! S_ISREG (inf[1].stat.st_mode))))
+    {
+      /* Perhaps we have a subdirectory that exists only in one directory.
+	 If so, just print a message to that effect.  */
+
+      if (inf[0].desc == -1 || inf[1].desc == -1)
+	{
+	  if ((inf[0].dir_p | inf[1].dir_p)
+	      && recursive
+	      && (entire_new_file_flag
+		  || (unidirectional_new_file_flag && inf[0].desc == -1)))
+	    val = diff_dirs (inf, compare_files, depth);
+	  else
+	    {
+	      char const *dir = (inf[0].desc == -1) ? dir1 : dir0;
+	      /* See Posix.2 section 4.17.6.1.1 for this format.  */
+	      message ("Only in %s: %s\n", dir, name0);
+	      val = 1;
+	    }
+	}
+      else
+	{
+	  /* We have two files that are not to be compared.  */
+
+	  /* See Posix.2 section 4.17.6.1.1 for this format.  */
+	  message5 ("File %s is a %s while file %s is a %s\n",
+		    inf[0].name, filetype (&inf[0].stat),
+		    inf[1].name, filetype (&inf[1].stat));
+
+	  /* This is a difference.  */
+	  val = 1;
+	}
+    }
+  else if ((no_details_flag & ~ignore_some_changes)
+	   && inf[0].stat.st_size != inf[1].stat.st_size
+	   && (inf[0].desc == -1 || S_ISREG (inf[0].stat.st_mode))
+	   && (inf[1].desc == -1 || S_ISREG (inf[1].stat.st_mode)))
+    {
+      message ("Files %s and %s differ\n", inf[0].name, inf[1].name);
+      val = 1;
+    }
+  else
+    {
+      /* Both exist and neither is a directory.  */
+
+      /* Open the files and record their descriptors.  */
+
+      if (inf[0].desc == -2)
+	if ((inf[0].desc = open (inf[0].name, O_RDONLY, 0)) < 0)
+	  {
+	    perror_with_name (inf[0].name);
+	    failed = 1;
+	  }
+      if (inf[1].desc == -2)
+	if (same_files)
+	  inf[1].desc = inf[0].desc;
+	else if ((inf[1].desc = open (inf[1].name, O_RDONLY, 0)) < 0)
+	  {
+	    perror_with_name (inf[1].name);
+	    failed = 1;
+	  }
+
+#if HAVE_SETMODE
+      if (binary_I_O)
+	for (i = 0; i <= 1; i++)
+	  if (0 <= inf[i].desc)
+	    setmode (inf[i].desc, O_BINARY);
+#endif
+
+      /* Compare the files, if no error was found.  */
+
+      val = failed ? 2 : diff_2_files (inf, depth);
+
+      /* Close the file descriptors.  */
+
+      if (inf[0].desc >= 0 && close (inf[0].desc) != 0)
+	{
+	  perror_with_name (inf[0].name);
+	  val = 2;
+	}
+      if (inf[1].desc >= 0 && inf[0].desc != inf[1].desc
+	  && close (inf[1].desc) != 0)
+	{
+	  perror_with_name (inf[1].name);
+	  val = 2;
+	}
+    }
+
+  /* Now the comparison has been done, if no error prevented it,
+     and VAL is the value this function will return.  */
+
+  if (val == 0 && !inf[0].dir_p)
+    {
+      if (print_file_same_flag)
+	message ("Files %s and %s are identical\n",
+		 inf[0].name, inf[1].name);
+    }
+  else
+    fflush (stdout);
+
+  if (free0)
+    free (free0);
+  if (free1)
+    free (free1);
+
+  return val;
+}

+ 344 - 0
sys/src/ape/cmd/diff/diff.h

@@ -0,0 +1,344 @@
+/* Shared definitions for GNU DIFF
+   Copyright (C) 1988, 89, 91, 92, 93 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#include "system.h"
+#include <stdio.h>
+#ifdef __FreeBSD__
+#include <gnuregex.h>
+#else
+#include "regex.h"
+#endif
+
+#define TAB_WIDTH 8
+
+/* Variables for command line options */
+
+#ifndef GDIFF_MAIN
+#define EXTERN extern
+#else
+#define EXTERN
+#endif
+
+enum output_style {
+  /* Default output style.  */
+  OUTPUT_NORMAL,
+  /* Output the differences with lines of context before and after (-c).  */
+  OUTPUT_CONTEXT,
+  /* Output the differences in a unified context diff format (-u). */
+  OUTPUT_UNIFIED,
+  /* Output the differences as commands suitable for `ed' (-e).  */
+  OUTPUT_ED,
+  /* Output the diff as a forward ed script (-f).  */
+  OUTPUT_FORWARD_ED,
+  /* Like -f, but output a count of changed lines in each "command" (-n). */
+  OUTPUT_RCS,
+  /* Output merged #ifdef'd file (-D).  */
+  OUTPUT_IFDEF,
+  /* Output sdiff style (-y).  */
+  OUTPUT_SDIFF
+};
+
+/* True for output styles that are robust,
+   i.e. can handle a file that ends in a non-newline.  */
+#define ROBUST_OUTPUT_STYLE(S) ((S) != OUTPUT_ED && (S) != OUTPUT_FORWARD_ED)
+
+EXTERN enum output_style output_style;
+
+/* Nonzero if output cannot be generated for identical files.  */
+EXTERN int no_diff_means_no_output;
+
+/* Number of lines of context to show in each set of diffs.
+   This is zero when context is not to be shown.  */
+EXTERN int      context;
+
+/* Consider all files as text files (-a).
+   Don't interpret codes over 0177 as implying a "binary file".  */
+EXTERN int	always_text_flag;
+
+/* Number of lines to keep in identical prefix and suffix.  */
+EXTERN int      horizon_lines;
+
+/* Ignore changes in horizontal white space (-b).  */
+EXTERN int      ignore_space_change_flag;
+
+/* Ignore all horizontal white space (-w).  */
+EXTERN int      ignore_all_space_flag;
+
+/* Ignore changes that affect only blank lines (-B).  */
+EXTERN int      ignore_blank_lines_flag;
+
+/* 1 if lines may match even if their contents do not match exactly.
+   This depends on various options.  */
+EXTERN int      ignore_some_line_changes;
+
+/* 1 if files may match even if their contents are not byte-for-byte identical.
+   This depends on various options.  */
+EXTERN int      ignore_some_changes;
+
+/* Ignore differences in case of letters (-i).  */
+EXTERN int      ignore_case_flag;
+
+/* File labels for `-c' output headers (-L).  */
+EXTERN char *file_label[2];
+
+struct regexp_list
+{
+  struct re_pattern_buffer buf;
+  struct regexp_list *next;
+};
+
+/* Regexp to identify function-header lines (-F).  */
+EXTERN struct regexp_list *function_regexp_list;
+
+/* Ignore changes that affect only lines matching this regexp (-I).  */
+EXTERN struct regexp_list *ignore_regexp_list;
+
+/* Say only whether files differ, not how (-q).  */
+EXTERN int 	no_details_flag;
+
+/* Report files compared that match (-s).
+   Normally nothing is output when that happens.  */
+EXTERN int      print_file_same_flag;
+
+/* Output the differences with exactly 8 columns added to each line
+   so that any tabs in the text line up properly (-T).  */
+EXTERN int	tab_align_flag;
+
+/* Expand tabs in the output so the text lines up properly
+   despite the characters added to the front of each line (-t).  */
+EXTERN int	tab_expand_flag;
+
+/* In directory comparison, specify file to start with (-S).
+   All file names less than this name are ignored.  */
+EXTERN char	*dir_start_file;
+
+/* If a file is new (appears in only one dir)
+   include its entire contents (-N).
+   Then `patch' would create the file with appropriate contents.  */
+EXTERN int	entire_new_file_flag;
+
+/* If a file is new (appears in only the second dir)
+   include its entire contents (-P).
+   Then `patch' would create the file with appropriate contents.  */
+EXTERN int	unidirectional_new_file_flag;
+
+/* Pipe each file's output through pr (-l).  */
+EXTERN int	paginate_flag;
+
+enum line_class {
+  /* Lines taken from just the first file.  */
+  OLD,
+  /* Lines taken from just the second file.  */
+  NEW,
+  /* Lines common to both files.  */
+  UNCHANGED,
+  /* A hunk containing both old and new lines (line groups only).  */
+  CHANGED
+};
+
+/* Line group formats for old, new, unchanged, and changed groups.  */
+EXTERN char *group_format[CHANGED + 1];
+
+/* Line formats for old, new, and unchanged lines.  */
+EXTERN char *line_format[UNCHANGED + 1];
+
+/* If using OUTPUT_SDIFF print extra information to help the sdiff filter. */
+EXTERN int sdiff_help_sdiff;
+
+/* Tell OUTPUT_SDIFF to show only the left version of common lines. */
+EXTERN int sdiff_left_only;
+
+/* Tell OUTPUT_SDIFF to not show common lines. */
+EXTERN int sdiff_skip_common_lines;
+
+/* The half line width and column 2 offset for OUTPUT_SDIFF.  */
+EXTERN unsigned sdiff_half_width;
+EXTERN unsigned sdiff_column2_offset;
+
+/* String containing all the command options diff received,
+   with spaces between and at the beginning but none at the end.
+   If there were no options given, this string is empty.  */
+EXTERN char *	switch_string;
+
+/* Nonzero means use heuristics for better speed.  */
+EXTERN int	heuristic;
+
+/* Name of program the user invoked (for error messages).  */
+EXTERN char *program_name;
+
+/* The result of comparison is an "edit script": a chain of `struct change'.
+   Each `struct change' represents one place where some lines are deleted
+   and some are inserted.
+
+   LINE0 and LINE1 are the first affected lines in the two files (origin 0).
+   DELETED is the number of lines deleted here from file 0.
+   INSERTED is the number of lines inserted here in file 1.
+
+   If DELETED is 0 then LINE0 is the number of the line before
+   which the insertion was done; vice versa for INSERTED and LINE1.  */
+
+struct change
+{
+  struct change *link;		/* Previous or next edit command  */
+  int inserted;			/* # lines of file 1 changed here.  */
+  int deleted;			/* # lines of file 0 changed here.  */
+  int line0;			/* Line number of 1st deleted line.  */
+  int line1;			/* Line number of 1st inserted line.  */
+  char ignore;			/* Flag used in context.c */
+};
+
+/* Structures that describe the input files.  */
+
+/* Data on one input file being compared.  */
+
+struct file_data {
+    int             desc;	/* File descriptor  */
+    char const      *name;	/* File name  */
+    struct stat     stat;	/* File status from fstat()  */
+    int             dir_p;	/* nonzero if file is a directory  */
+
+    /* Buffer in which text of file is read.  */
+    char *	    buffer;
+    /* Allocated size of buffer.  */
+    size_t	    bufsize;
+    /* Number of valid characters now in the buffer. */
+    size_t	    buffered_chars;
+
+    /* Array of pointers to lines in the file.  */
+    char const **linbuf;
+
+    /* linbuf_base <= buffered_lines <= valid_lines <= alloc_lines.
+       linebuf[linbuf_base ... buffered_lines - 1] are possibly differing.
+       linebuf[linbuf_base ... valid_lines - 1] contain valid data.
+       linebuf[linbuf_base ... alloc_lines - 1] are allocated.  */
+    int linbuf_base, buffered_lines, valid_lines, alloc_lines;
+
+    /* Pointer to end of prefix of this file to ignore when hashing. */
+    char const *prefix_end;
+
+    /* Count of lines in the prefix.
+       There are this many lines in the file before linbuf[0].  */
+    int prefix_lines;
+
+    /* Pointer to start of suffix of this file to ignore when hashing. */
+    char const *suffix_begin;
+
+    /* Vector, indexed by line number, containing an equivalence code for
+       each line.  It is this vector that is actually compared with that
+       of another file to generate differences. */
+    int		   *equivs;
+
+    /* Vector, like the previous one except that
+       the elements for discarded lines have been squeezed out.  */
+    int		   *undiscarded;
+
+    /* Vector mapping virtual line numbers (not counting discarded lines)
+       to real ones (counting those lines).  Both are origin-0.  */
+    int		   *realindexes;
+
+    /* Total number of nondiscarded lines. */
+    int		    nondiscarded_lines;
+
+    /* Vector, indexed by real origin-0 line number,
+       containing 1 for a line that is an insertion or a deletion.
+       The results of comparison are stored here.  */
+    char	   *changed_flag;
+
+    /* 1 if file ends in a line with no final newline. */
+    int		    missing_newline;
+
+    /* 1 more than the maximum equivalence value used for this or its
+       sibling file. */
+    int equiv_max;
+};
+
+/* Describe the two files currently being compared.  */
+
+EXTERN struct file_data files[2];
+
+/* Stdio stream to output diffs to.  */
+
+EXTERN FILE *outfile;
+
+/* Declare various functions.  */
+
+/* analyze.c */
+int diff_2_files PARAMS((struct file_data[], int));
+
+/* context.c */
+void print_context_header PARAMS((struct file_data[], int));
+void print_context_script PARAMS((struct change *, int));
+
+/* diff.c */
+int excluded_filename PARAMS((char const *));
+
+/* dir.c */
+int diff_dirs PARAMS((struct file_data const[], int (*) PARAMS((char const *, char const *, char const *, char const *, int)), int));
+
+/* ed.c */
+void print_ed_script PARAMS((struct change *));
+void pr_forward_ed_script PARAMS((struct change *));
+
+/* ifdef.c */
+void print_ifdef_script PARAMS((struct change *));
+
+/* io.c */
+int read_files PARAMS((struct file_data[], int));
+int sip PARAMS((struct file_data *, int));
+void slurp PARAMS((struct file_data *));
+
+/* normal.c */
+void print_normal_script PARAMS((struct change *));
+
+/* rcs.c */
+void print_rcs_script PARAMS((struct change *));
+
+/* side.c */
+void print_sdiff_script PARAMS((struct change *));
+
+/* util.c */
+VOID *xmalloc PARAMS((size_t));
+VOID *xrealloc PARAMS((VOID *, size_t));
+char *concat PARAMS((char const *, char const *, char const *));
+char *dir_file_pathname PARAMS((char const *, char const *));
+int change_letter PARAMS((int, int));
+int line_cmp PARAMS((char const *, char const *));
+int translate_line_number PARAMS((struct file_data const *, int));
+struct change *find_change PARAMS((struct change *));
+struct change *find_reverse_change PARAMS((struct change *));
+void analyze_hunk PARAMS((struct change *, int *, int *, int *, int *, int *, int *));
+void begin_output PARAMS((void));
+void debug_script PARAMS((struct change *));
+void error PARAMS((char const *, char const *, char const *));
+void fatal PARAMS((char const *));
+void finish_output PARAMS((void));
+void message PARAMS((char const *, char const *, char const *));
+void message5 PARAMS((char const *, char const *, char const *, char const *, char const *));
+void output_1_line PARAMS((char const *, char const *, char const *, char const *));
+void perror_with_name PARAMS((char const *));
+void pfatal_with_name PARAMS((char const *));
+void print_1_line PARAMS((char const *, char const * const *));
+void print_message_queue PARAMS((void));
+void print_number_range PARAMS((int, struct file_data *, int, int));
+void print_script PARAMS((struct change *, struct change * (*) PARAMS((struct change *)), void (*) PARAMS((struct change *))));
+void setup_output PARAMS((char const *, char const *, int));
+void translate_range PARAMS((struct file_data const *, int, int, int *, int *));
+
+/* version.c */
+extern char const version_string[];

+ 3916 - 0
sys/src/ape/cmd/diff/diff.texi

@@ -0,0 +1,3916 @@
+\input texinfo @c -*-texinfo-*-
+@c %**start of header
+@setfilename diff.info
+@settitle Comparing and Merging Files
+@setchapternewpage odd
+@c %**end of header
+
+@ifinfo
+This file documents the the GNU @code{diff}, @code{diff3}, @code{sdiff},
+and @code{cmp} commands for showing the differences between text files
+and the @code{patch} command for using their output to update files.
+
+Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+@ignore
+Permission is granted to process this file through TeX and print the
+results, provided the printed document carries copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual).
+
+@end ignore
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@end ifinfo
+
+@titlepage
+@title Comparing and Merging Files
+@subtitle @code{diff}, @code{diff3}, @code{sdiff}, @code{cmp}, and @code{patch}
+@subtitle Edition 1.3, for @code{diff} 2.5 and @code{patch} 2.1
+@subtitle September 1993
+@author by David MacKenzie, Paul Eggert, and Richard Stallman
+
+@page
+@vskip 0pt plus 1filll
+Copyright @copyright{} 1992, 1993, 1994 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@end titlepage
+
+@node Top, , , (dir)
+
+@ifinfo
+This file documents the the GNU @code{diff}, @code{diff3}, @code{sdiff},
+and @code{cmp} commands for showing the differences between text files
+and the @code{patch} command for using their output to update files.
+
+This is Edition 1.2, for @code{diff} 2.4 and @code{patch} 2.1.
+@end ifinfo
+
+@menu
+* Overview::		Preliminary information.
+
+* Comparison::		What file comparison means.
+* Output Formats::	Formats for difference reports.
+* Comparing Directories::	Comparing files and directories.
+* Adjusting Output::	Making @code{diff} output prettier.
+* diff Performance::	Making @code{diff} smarter or faster.
+* Comparing Three Files:: Formats for three-way difference reports.
+
+* diff3 Merging::	Merging from a common ancestor.
+* Interactive Merging::	Interactive merging with @code{sdiff}.
+* Merging with patch::	Using @code{patch} to change old files into new ones.
+* Making Patches::	Tips for making patch distributions.
+
+* Invoking cmp::	How to run @code{cmp} and a summary of its options.
+* Invoking diff::	How to run @code{diff} and a summary of its options.
+* Invoking diff3::	How to run @code{diff3} and a summary of its options.
+* Invoking patch::	How to run @code{patch} and a summary of its options.
+* Invoking sdiff::	How to run @code{sdiff} and a summary of its options.
+
+* Incomplete Lines::	Lines that lack trailing newlines.
+* Projects::		If you think you've found a bug or other shortcoming.
+
+* Concept Index::	Index of concepts.
+@end menu
+
+@node Overview, Comparison, , Top
+@unnumbered Overview
+@cindex overview of @code{diff} and @code{patch}
+
+Computer users often find occasion to ask how two files differ.  Perhaps
+one file is a newer version of the other file.  Or maybe the two files
+started out as identical copies but were changed by different people.
+
+You can use the @code{diff} command to show differences between two
+files, or each corresponding file in two directories.  @code{diff}
+outputs differences between files line by line in any of several
+formats, selectable by command line options.  This set of differences is
+often called a @dfn{diff} or @dfn{patch}.  For files that are identical,
+@code{diff} normally produces no output; for binary (non-text) files,
+@code{diff} normally reports only that they are different.
+
+You can use the @code{cmp} command to show the offsets and line numbers
+where two files differ.  @code{cmp} can also show all the characters
+that differ between the two files, side by side.  Another way to compare
+two files character by character is the Emacs command @kbd{M-x
+compare-windows}.  @xref{Other Window, , Other Window, emacs, The GNU
+Emacs Manual}, for more information on that command.
+
+You can use the @code{diff3} command to show differences among three
+files.  When two people have made independent changes to a common
+original, @code{diff3} can report the differences between the original
+and the two changed versions, and can produce a merged file that
+contains both persons' changes together with warnings about conflicts.
+
+You can use the @code{sdiff} command to merge two files interactively.
+
+You can use the set of differences produced by @code{diff} to distribute
+updates to text files (such as program source code) to other people.
+This method is especially useful when the differences are small compared
+to the complete files.  Given @code{diff} output, you can use the
+@code{patch} program to update, or @dfn{patch}, a copy of the file.  If you
+think of @code{diff} as subtracting one file from another to produce
+their difference, you can think of @code{patch} as adding the difference
+to one file to reproduce the other.
+
+This manual first concentrates on making diffs, and later shows how to
+use diffs to update files.
+
+GNU @code{diff} was written by Mike Haertel, David Hayes, Richard
+Stallman, Len Tower, and Paul Eggert.  Wayne Davison designed and
+implemented the unified output format.  The basic algorithm is described
+in ``An O(ND) Difference Algorithm and its Variations'', Eugene W. Myers,
+@cite{Algorithmica} Vol.@: 1 No.@: 2, 1986, pp.@: 251--266; and in ``A File
+Comparison Program'', Webb Miller and Eugene W. Myers,
+@cite{Software---Practice and Experience} Vol.@: 15 No.@: 11, 1985,
+pp.@: 1025--1040.
+@c From: "Gene Myers" <gene@cs.arizona.edu>
+@c They are about the same basic algorithm; the Algorithmica
+@c paper gives a rigorous treatment and the sub-algorithm for
+@c delivering scripts and should be the primary reference, but
+@c both should be mentioned.
+The algorithm was independently discovered as described in
+``Algorithms for Approximate String Matching'',
+E. Ukkonen, @cite{Information and Control} Vol.@: 64, 1985, pp.@: 100--118.
+@c From: "Gene Myers" <gene@cs.arizona.edu>
+@c Date: Wed, 29 Sep 1993 08:27:55 MST
+@c Ukkonen should be given credit for also discovering the algorithm used
+@c in GNU diff.
+
+GNU @code{diff3} was written by Randy Smith.  GNU @code{sdiff} was
+written by Thomas Lord.  GNU @code{cmp} was written by Torbjorn Granlund
+and David MacKenzie.
+
+@code{patch} was written mainly by Larry Wall; the GNU enhancements were
+written mainly by Wayne Davison and David MacKenzie.  Parts of this
+manual are adapted from a manual page written by Larry Wall, with his
+permission.
+
+@node Comparison, Output Formats, Overview, Top
+@chapter What Comparison Means
+@cindex introduction
+
+There are several ways to think about the differences between two files.
+One way to think of the differences is as a series of lines that were
+deleted from, inserted in, or changed in one file to produce the other
+file.  @code{diff} compares two files line by line, finds groups of
+lines that differ, and reports each group of differing lines.  It can
+report the differing lines in several formats, which have different
+purposes.
+
+GNU @code{diff} can show whether files are different without detailing
+the differences.  It also provides ways to suppress certain kinds of
+differences that are not important to you.  Most commonly, such
+differences are changes in the amount of white space between words or
+lines.  @code{diff} also provides ways to suppress differences in
+alphabetic case or in lines that match a regular expression that you
+provide.  These options can accumulate; for example, you can ignore
+changes in both white space and alphabetic case.
+
+Another way to think of the differences between two files is as a
+sequence of pairs of characters that can be either identical or
+different.  @code{cmp} reports the differences between two files
+character by character, instead of line by line.  As a result, it is
+more useful than @code{diff} for comparing binary files.  For text
+files, @code{cmp} is useful mainly when you want to know only whether
+two files are identical.
+
+To illustrate the effect that considering changes character by character
+can have compared with considering them line by line, think of what
+happens if a single newline character is added to the beginning of a
+file.  If that file is then compared with an otherwise identical file
+that lacks the newline at the beginning, @code{diff} will report that a
+blank line has been added to the file, while @code{cmp} will report that
+almost every character of the two files differs.
+
+@code{diff3} normally compares three input files line by line, finds
+groups of lines that differ, and reports each group of differing lines.
+Its output is designed to make it easy to inspect two different sets of
+changes to the same file.
+
+@menu
+* Hunks::		Groups of differing lines.
+* White Space::		Suppressing differences in white space.
+* Blank Lines::		Suppressing differences in blank lines.
+* Case Folding::	Suppressing differences in alphabetic case.
+* Specified Folding::	Suppressing differences that match regular expressions.
+* Brief::		Summarizing which files are different.
+* Binary::		Comparing binary files or forcing text comparisons.
+@end menu
+
+@node Hunks, White Space, , Comparison
+@section Hunks
+@cindex hunks
+
+When comparing two files, @code{diff} finds sequences of lines common to
+both files, interspersed with groups of differing lines called
+@dfn{hunks}.  Comparing two identical files yields one sequence of
+common lines and no hunks, because no lines differ.  Comparing two
+entirely different files yields no common lines and one large hunk that
+contains all lines of both files.  In general, there are many ways to
+match up lines between two given files.  @code{diff} tries to minimize
+the total hunk size by finding large sequences of common lines
+interspersed with small hunks of differing lines.
+
+For example, suppose the file @file{F} contains the three lines
+@samp{a}, @samp{b}, @samp{c}, and the file @file{G} contains the same
+three lines in reverse order @samp{c}, @samp{b}, @samp{a}.  If
+@code{diff} finds the line @samp{c} as common, then the command
+@samp{diff F G} produces this output:
+
+@example
+1,2d0
+< a
+< b
+3a2,3
+> b
+> a
+@end example
+
+@noindent
+But if @code{diff} notices the common line @samp{b} instead, it produces
+this output:
+
+@example
+1c1
+< a
+---
+> c
+3c3
+< c
+---
+> a
+@end example
+
+@noindent
+It is also possible to find @samp{a} as the common line.  @code{diff}
+does not always find an optimal matching between the files; it takes
+shortcuts to run faster.  But its output is usually close to the
+shortest possible.  You can adjust this tradeoff with the
+@samp{--minimal} option (@pxref{diff Performance}).
+
+@node White Space, Blank Lines, Hunks, Comparison
+@section Suppressing Differences in Blank and Tab Spacing
+@cindex blank and tab difference suppression
+@cindex tab and blank difference suppression
+
+The @samp{-b} and @samp{--ignore-space-change} options ignore white space
+at line end, and considers all other sequences of one or more
+white space characters to be equivalent.  With these options,
+@code{diff} considers the following two lines to be equivalent, where
+@samp{$} denotes the line end:
+
+@example
+Here lyeth  muche rychnesse  in lytell space.   -- John Heywood$
+Here lyeth muche rychnesse in lytell space. -- John Heywood   $
+@end example
+
+The @samp{-w} and @samp{--ignore-all-space} options are stronger than
+@samp{-b}.  They ignore difference even if one file has white space where
+the other file has none.  @dfn{White space} characters include
+tab, newline, vertical tab, form feed, carriage return, and space;
+some locales may define additional characters to be white space.
+With these options, @code{diff} considers the
+following two lines to be equivalent, where @samp{$} denotes the line
+end and @samp{^M} denotes a carriage return:
+
+@example
+Here lyeth  muche  rychnesse in lytell space.--  John Heywood$
+  He relyeth much erychnes  seinly tells pace.  --John Heywood   ^M$
+@end example
+
+@node Blank Lines, Case Folding, White Space, Comparison
+@section Suppressing Differences in Blank Lines
+@cindex blank line difference suppression
+
+The @samp{-B} and @samp{--ignore-blank-lines} options ignore insertions
+or deletions of blank lines.  These options normally affect only lines
+that are completely empty; they do not affect lines that look empty but
+contain space or tab characters.  With these options, for example, a
+file containing
+@example
+1.  A point is that which has no part.
+
+2.  A line is breadthless length.
+-- Euclid, The Elements, I
+@end example
+@noindent
+is considered identical to a file containing
+@example
+1.  A point is that which has no part.
+2.  A line is breadthless length.
+
+
+-- Euclid, The Elements, I
+@end example
+
+@node Case Folding, Specified Folding, Blank Lines, Comparison
+@section Suppressing Case Differences
+@cindex case difference suppression
+
+GNU @code{diff} can treat lowercase letters as equivalent to their
+uppercase counterparts, so that, for example, it considers @samp{Funky
+Stuff}, @samp{funky STUFF}, and @samp{fUNKy stuFf} to all be the same.
+To request this, use the @samp{-i} or @samp{--ignore-case} option.
+
+@node Specified Folding, Brief, Case Folding, Comparison
+@section Suppressing Lines Matching a Regular Expression
+@cindex regular expression suppression
+
+To ignore insertions and deletions of lines that match a regular
+expression, use the @samp{-I @var{regexp}} or
+@samp{--ignore-matching-lines=@var{regexp}} option.  You should escape
+regular expressions that contain shell metacharacters to prevent the
+shell from expanding them.  For example, @samp{diff -I '^[0-9]'} ignores
+all changes to lines beginning with a digit.
+
+However, @samp{-I} only ignores the insertion or deletion of lines that
+contain the regular expression if every changed line in the hunk---every
+insertion and every deletion---matches the regular expression.  In other
+words, for each nonignorable change, @code{diff} prints the complete set
+of changes in its vicinity, including the ignorable ones.
+
+You can specify more than one regular expression for lines to ignore by
+using more than one @samp{-I} option.  @code{diff} tries to match each
+line against each regular expression, starting with the last one given.
+
+@node Brief, Binary, Specified Folding, Comparison
+@section Summarizing Which Files Differ
+@cindex summarizing which files differ
+@cindex brief difference reports
+
+When you only want to find out whether files are different, and you
+don't care what the differences are, you can use the summary output
+format.  In this format, instead of showing the differences between the
+files, @code{diff} simply reports whether files differ.  The @samp{-q}
+and @samp{--brief} options select this output format.
+
+This format is especially useful when comparing the contents of two
+directories.  It is also much faster than doing the normal line by line
+comparisons, because @code{diff} can stop analyzing the files as soon as
+it knows that there are any differences.
+
+You can also get a brief indication of whether two files differ by using
+@code{cmp}.  For files that are identical, @code{cmp} produces no
+output.  When the files differ, by default, @code{cmp} outputs the byte
+offset and line number where the first difference occurs.  You can use
+the @samp{-s} option to suppress that information, so that @code{cmp}
+produces no output and reports whether the files differ using only its
+exit status (@pxref{Invoking cmp}).
+
+@c Fix this.
+Unlike @code{diff}, @code{cmp} cannot compare directories; it can only
+compare two files.
+
+@node Binary, , Brief, Comparison
+@section Binary Files and Forcing Text Comparisons
+@cindex binary file diff
+@cindex text versus binary diff
+
+If @code{diff} thinks that either of the two files it is comparing is
+binary (a non-text file), it normally treats that pair of files much as
+if the summary output format had been selected (@pxref{Brief}), and
+reports only that the binary files are different.  This is because line
+by line comparisons are usually not meaningful for binary files.
+
+@code{diff} determines whether a file is text or binary by checking the
+first few bytes in the file; the exact number of bytes is system
+dependent, but it is typically several thousand.  If every character in
+that part of the file is non-null, @code{diff} considers the file to be
+text; otherwise it considers the file to be binary.
+
+Sometimes you might want to force @code{diff} to consider files to be
+text.  For example, you might be comparing text files that contain
+null characters; @code{diff} would erroneously decide that those are
+non-text files.  Or you might be comparing documents that are in a
+format used by a word processing system that uses null characters to
+indicate special formatting.  You can force @code{diff} to consider all
+files to be text files, and compare them line by line, by using the
+@samp{-a} or @samp{--text} option.  If the files you compare using this
+option do not in fact contain text, they will probably contain few
+newline characters, and the @code{diff} output will consist of hunks
+showing differences between long lines of whatever characters the files
+contain.
+
+You can also force @code{diff} to consider all files to be binary files,
+and report only whether they differ (but not how).  Use the
+@samp{--brief} option for this.
+
+In operating systems that distinguish between text and binary files,
+@code{diff} normally reads and writes all data as text.  Use the
+@samp{--binary} option to force @code{diff} to read and write binary
+data instead.  This option has no effect on a Posix-compliant system
+like GNU or traditional Unix.  However, many personal computer
+operating systems represent the end of a line with a carriage return
+followed by a newline.  On such systems, @code{diff} normally ignores
+these carriage returns on input and generates them at the end of each
+output line, but with the @samp{--binary} option @code{diff} treats
+each carriage return as just another input character, and does not
+generate a carriage return at the end of each output line.  This can be
+useful when dealing with non-text files that are meant to be
+interchanged with Posix-compliant systems.
+
+If you want to compare two files byte by byte, you can use the
+@code{cmp} program with the @samp{-l} option to show the values of each
+differing byte in the two files.  With GNU @code{cmp}, you can also use
+the @samp{-c} option to show the ASCII representation of those bytes.
+@xref{Invoking cmp}, for more information.
+
+If @code{diff3} thinks that any of the files it is comparing is binary
+(a non-text file), it normally reports an error, because such
+comparisons are usually not useful.  @code{diff3} uses the same test as
+@code{diff} to decide whether a file is binary.  As with @code{diff}, if
+the input files contain a few non-text characters but otherwise are like
+text files, you can force @code{diff3} to consider all files to be text
+files and compare them line by line by using the @samp{-a} or
+@samp{--text} options.
+
+@node Output Formats, Comparing Directories, Comparison, Top
+@chapter @code{diff} Output Formats
+@cindex output formats
+@cindex format of @code{diff} output
+
+@code{diff} has several mutually exclusive options for output format.
+The following sections describe each format, illustrating how
+@code{diff} reports the differences between two sample input files.
+
+@menu
+* Sample diff Input::	Sample @code{diff} input files for examples.
+* Normal::		Showing differences without surrounding text.
+* Context::		Showing differences with the surrounding text.
+* Side by Side::        Showing differences in two columns.
+* Scripts::		Generating scripts for other programs.
+* If-then-else::	Merging files with if-then-else.
+@end menu
+
+@node Sample diff Input, Normal, , Output Formats
+@section Two Sample Input Files
+@cindex @code{diff} sample input
+@cindex sample input for @code{diff}
+
+Here are two sample files that we will use in numerous examples to
+illustrate the output of @code{diff} and how various options can change
+it.
+
+This is the file @file{lao}:
+
+@example
+The Way that can be told of is not the eternal Way;
+The name that can be named is not the eternal name.
+The Nameless is the origin of Heaven and Earth;
+The Named is the mother of all things.
+Therefore let there always be non-being,
+  so we may see their subtlety,
+And let there always be being,
+  so we may see their outcome.
+The two are the same,
+But after they are produced,
+  they have different names.
+@end example
+
+This is the file @file{tzu}:
+
+@example
+The Nameless is the origin of Heaven and Earth;
+The named is the mother of all things.
+
+Therefore let there always be non-being,
+  so we may see their subtlety,
+And let there always be being,
+  so we may see their outcome.
+The two are the same,
+But after they are produced,
+  they have different names.
+They both may be called deep and profound.
+Deeper and more profound,
+The door of all subtleties!
+@end example
+
+In this example, the first hunk contains just the first two lines of
+@file{lao}, the second hunk contains the fourth line of @file{lao}
+opposing the second and third lines of @file{tzu}, and the last hunk
+contains just the last three lines of @file{tzu}.
+
+@node Normal, Context, Sample diff Input, Output Formats
+@section Showing Differences Without Context
+@cindex normal output format
+@cindex @samp{<} output format
+
+The ``normal'' @code{diff} output format shows each hunk of differences
+without any surrounding context.  Sometimes such output is the clearest
+way to see how lines have changed, without the clutter of nearby
+unchanged lines (although you can get similar results with the context
+or unified formats by using 0 lines of context).  However, this format
+is no longer widely used for sending out patches; for that purpose, the
+context format (@pxref{Context Format}) and the unified format
+(@pxref{Unified Format}) are superior.  Normal format is the default for
+compatibility with older versions of @code{diff} and the Posix standard.
+
+@menu
+* Detailed Normal::	A detailed description of normal output format.
+* Example Normal::	Sample output in the normal format.
+@end menu
+
+@node Detailed Normal, Example Normal, , Normal
+@subsection Detailed Description of Normal Format
+
+The normal output format consists of one or more hunks of differences;
+each hunk shows one area where the files differ.  Normal format hunks
+look like this:
+
+@example
+@var{change-command}
+< @var{from-file-line}
+< @var{from-file-line}@dots{}
+---
+> @var{to-file-line}
+> @var{to-file-line}@dots{}
+@end example
+
+There are three types of change commands.  Each consists of a line
+number or comma-separated range of lines in the first file, a single
+character indicating the kind of change to make, and a line number or
+comma-separated range of lines in the second file.  All line numbers are
+the original line numbers in each file.  The types of change commands
+are:
+
+@table @samp
+@item @var{l}a@var{r}
+Add the lines in range @var{r} of the second file after line @var{l} of
+the first file.  For example, @samp{8a12,15} means append lines 12--15
+of file 2 after line 8 of file 1; or, if changing file 2 into file 1,
+delete lines 12--15 of file 2.
+
+@item @var{f}c@var{t}
+Replace the lines in range @var{f} of the first file with lines in range
+@var{t} of the second file.  This is like a combined add and delete, but
+more compact.  For example, @samp{5,7c8,10} means change lines 5--7 of
+file 1 to read as lines 8--10 of file 2; or, if changing file 2 into
+file 1, change lines 8--10 of file 2 to read as lines 5--7 of file 1.
+
+@item @var{r}d@var{l}
+Delete the lines in range @var{r} from the first file; line @var{l} is where
+they would have appeared in the second file had they not been deleted.
+For example, @samp{5,7d3} means delete lines 5--7 of file 1; or, if
+changing file 2 into file 1, append lines 5--7 of file 1 after line 3 of
+file 2.
+@end table
+
+@node Example Normal, , Detailed Normal, Normal
+@subsection An Example of Normal Format
+
+Here is the output of the command @samp{diff lao tzu}
+(@pxref{Sample diff Input}, for the complete contents of the two files).
+Notice that it shows only the lines that are different between the two
+files.
+
+@example
+1,2d0
+< The Way that can be told of is not the eternal Way;
+< The name that can be named is not the eternal name.
+4c2,3
+< The Named is the mother of all things.
+---
+> The named is the mother of all things.
+> 
+11a11,13
+> They both may be called deep and profound.
+> Deeper and more profound,
+> The door of all subtleties!
+@end example
+
+@node Context, Side by Side, Normal, Output Formats
+@section Showing Differences in Their Context
+@cindex context output format
+@cindex @samp{!} output format
+
+Usually, when you are looking at the differences between files, you will
+also want to see the parts of the files near the lines that differ, to
+help you understand exactly what has changed.  These nearby parts of the
+files are called the @dfn{context}.
+
+GNU @code{diff} provides two output formats that show context around the
+differing lines: @dfn{context format} and @dfn{unified format}.  It can
+optionally show in which function or section of the file the differing
+lines are found.
+
+If you are distributing new versions of files to other people in the
+form of @code{diff} output, you should use one of the output formats
+that show context so that they can apply the diffs even if they have
+made small changes of their own to the files.  @code{patch} can apply
+the diffs in this case by searching in the files for the lines of
+context around the differing lines; if those lines are actually a few
+lines away from where the diff says they are, @code{patch} can adjust
+the line numbers accordingly and still apply the diff correctly.
+@xref{Imperfect}, for more information on using @code{patch} to apply
+imperfect diffs.
+
+@menu
+* Context Format::	An output format that shows surrounding lines.
+* Unified Format::	A more compact output format that shows context.
+* Sections::		Showing which sections of the files differences are in.
+* Alternate Names::	Showing alternate file names in context headers.
+@end menu
+
+@node Context Format, Unified Format, , Context
+@subsection Context Format
+
+The context output format shows several lines of context around the
+lines that differ.  It is the standard format for distributing updates
+to source code.
+
+To select this output format, use the @samp{-C @var{lines}},
+@samp{--context@r{[}=@var{lines}@r{]}}, or @samp{-c} option.  The
+argument @var{lines} that some of these options take is the number of
+lines of context to show.  If you do not specify @var{lines}, it
+defaults to three.  For proper operation, @code{patch} typically needs
+at least two lines of context.
+
+@menu
+* Detailed Context::	A detailed description of the context output format.
+* Example Context::	Sample output in context format.
+* Less Context::	Another sample with less context.
+@end menu
+
+@node Detailed Context, Example Context, , Context Format
+@subsubsection Detailed Description of Context Format
+
+The context output format starts with a two-line header, which looks
+like this:
+
+@example
+*** @var{from-file} @var{from-file-modification-time}
+--- @var{to-file} @var{to-file-modification time}
+@end example
+
+@noindent
+You can change the header's content with the @samp{-L @var{label}} or
+@samp{--label=@var{label}} option; see @ref{Alternate Names}.
+
+Next come one or more hunks of differences; each hunk shows one area
+where the files differ.  Context format hunks look like this:
+
+@example
+***************
+*** @var{from-file-line-range} ****
+  @var{from-file-line}
+  @var{from-file-line}@dots{}
+--- @var{to-file-line-range} ----
+  @var{to-file-line}
+  @var{to-file-line}@dots{}
+@end example
+
+The lines of context around the lines that differ start with two space
+characters.  The lines that differ between the two files start with one
+of the following indicator characters, followed by a space character:
+
+@table @samp
+@item !
+A line that is part of a group of one or more lines that changed between
+the two files.  There is a corresponding group of lines marked with
+@samp{!} in the part of this hunk for the other file.
+
+@item +
+An ``inserted'' line in the second file that corresponds to nothing in
+the first file.
+
+@item -
+A ``deleted'' line in the first file that corresponds to nothing in the
+second file.
+@end table
+
+If all of the changes in a hunk are insertions, the lines of
+@var{from-file} are omitted.  If all of the changes are deletions, the
+lines of @var{to-file} are omitted.
+
+@node Example Context, Less Context, Detailed Context, Context Format
+@subsubsection An Example of Context Format
+
+Here is the output of @samp{diff -c lao tzu} (@pxref{Sample diff Input},
+for the complete contents of the two files).  Notice that up to three
+lines that are not different are shown around each line that is
+different; they are the context lines.  Also notice that the first two
+hunks have run together, because their contents overlap.
+
+@example
+*** lao	Sat Jan 26 23:30:39 1991
+--- tzu	Sat Jan 26 23:30:50 1991
+***************
+*** 1,7 ****
+- The Way that can be told of is not the eternal Way;
+- The name that can be named is not the eternal name.
+  The Nameless is the origin of Heaven and Earth;
+! The Named is the mother of all things.
+  Therefore let there always be non-being,
+    so we may see their subtlety,
+  And let there always be being,
+--- 1,6 ----
+  The Nameless is the origin of Heaven and Earth;
+! The named is the mother of all things.
+! 
+  Therefore let there always be non-being,
+    so we may see their subtlety,
+  And let there always be being,
+***************
+*** 9,11 ****
+--- 8,13 ----
+  The two are the same,
+  But after they are produced,
+    they have different names.
++ They both may be called deep and profound.
++ Deeper and more profound,
++ The door of all subtleties!
+@end example
+
+@node Less Context, , Example Context, Context Format
+@subsubsection An Example of Context Format with Less Context
+
+Here is the output of @samp{diff --context=1 lao tzu} (@pxref{Sample
+diff Input}, for the complete contents of the two files).  Notice that
+at most one context line is reported here.
+
+@example
+*** lao	Sat Jan 26 23:30:39 1991
+--- tzu	Sat Jan 26 23:30:50 1991
+***************
+*** 1,5 ****
+- The Way that can be told of is not the eternal Way;
+- The name that can be named is not the eternal name.
+  The Nameless is the origin of Heaven and Earth;
+! The Named is the mother of all things.
+  Therefore let there always be non-being,
+--- 1,4 ----
+  The Nameless is the origin of Heaven and Earth;
+! The named is the mother of all things.
+! 
+  Therefore let there always be non-being,
+***************
+*** 11 ****
+--- 10,13 ----
+    they have different names.
++ They both may be called deep and profound.
++ Deeper and more profound,
++ The door of all subtleties!
+@end example
+
+@node Unified Format, Sections, Context Format, Context
+@subsection Unified Format
+@cindex unified output format
+@cindex @samp{+-} output format
+
+The unified output format is a variation on the context format that is
+more compact because it omits redundant context lines.  To select this
+output format, use the @samp{-U @var{lines}},
+@samp{--unified@r{[}=@var{lines}@r{]}}, or @samp{-u}
+option.  The argument @var{lines} is the number of lines of context to
+show.  When it is not given, it defaults to three.
+
+At present, only GNU @code{diff} can produce this format and only GNU
+@code{patch} can automatically apply diffs in this format.  For proper
+operation, @code{patch} typically needs at least two lines of context.
+
+@menu
+* Detailed Unified::	A detailed description of unified format.
+* Example Unified::	Sample output in unified format.
+@end menu
+
+@node Detailed Unified, Example Unified, , Unified Format
+@subsubsection Detailed Description of Unified Format
+
+The unified output format starts with a two-line header, which looks
+like this:
+
+@example
+--- @var{from-file} @var{from-file-modification-time}
++++ @var{to-file} @var{to-file-modification-time}
+@end example
+
+@noindent
+You can change the header's content with the @samp{-L @var{label}} or
+@samp{--label=@var{label}} option; see @xref{Alternate Names}.
+
+Next come one or more hunks of differences; each hunk shows one area
+where the files differ.  Unified format hunks look like this:
+
+@example
+@@@@ @var{from-file-range} @var{to-file-range} @@@@
+ @var{line-from-either-file}
+ @var{line-from-either-file}@dots{}
+@end example
+
+The lines common to both files begin with a space character.  The lines
+that actually differ between the two files have one of the following
+indicator characters in the left column:
+
+@table @samp
+@item +
+A line was added here to the first file.
+
+@item -
+A line was removed here from the first file.
+@end table
+
+@node Example Unified, , Detailed Unified, Unified Format
+@subsubsection An Example of Unified Format
+
+Here is the output of the command @samp{diff -u lao tzu}
+(@pxref{Sample diff Input}, for the complete contents of the two files):
+
+@example
+--- lao	Sat Jan 26 23:30:39 1991
++++ tzu	Sat Jan 26 23:30:50 1991
+@@@@ -1,7 +1,6 @@@@
+-The Way that can be told of is not the eternal Way;
+-The name that can be named is not the eternal name.
+ The Nameless is the origin of Heaven and Earth;
+-The Named is the mother of all things.
++The named is the mother of all things.
++
+ Therefore let there always be non-being,
+   so we may see their subtlety,
+ And let there always be being,
+@@@@ -9,3 +8,6 @@@@
+ The two are the same,
+ But after they are produced,
+   they have different names.
++They both may be called deep and profound.
++Deeper and more profound,
++The door of all subtleties!
+@end example
+
+@node Sections, Alternate Names, Unified Format, Context
+@subsection Showing Which Sections Differences Are in
+@cindex headings
+@cindex section headings
+
+Sometimes you might want to know which part of the files each change
+falls in.  If the files are source code, this could mean which function
+was changed.  If the files are documents, it could mean which chapter or
+appendix was changed.  GNU @code{diff} can show this by displaying the
+nearest section heading line that precedes the differing lines.  Which
+lines are ``section headings'' is determined by a regular expression.
+
+@menu
+* Specified Headings::	Showing headings that match regular expressions.
+* C Function Headings::	Showing headings of C functions.
+@end menu
+
+@node Specified Headings, C Function Headings, , Sections
+@subsubsection Showing Lines That Match Regular Expressions
+@cindex specified headings
+@cindex regular expression matching headings
+
+To show in which sections differences occur for files that are not
+source code for C or similar languages, use the @samp{-F @var{regexp}}
+or @samp{--show-function-line=@var{regexp}} option.  @code{diff}
+considers lines that match the argument @var{regexp} to be the beginning
+of a section of the file.  Here are suggested regular expressions for
+some common languages:
+
+@c Please add to this list, e.g. Fortran, Pascal.
+@table @samp
+@item ^[A-Za-z_]
+C, C++, Prolog
+@item ^(
+Lisp
+@item ^@@\(chapter\|appendix\|unnumbered\|chapheading\)
+Texinfo
+@end table
+
+This option does not automatically select an output format; in order to
+use it, you must select the context format (@pxref{Context Format}) or
+unified format (@pxref{Unified Format}).  In other output formats it
+has no effect.
+
+The @samp{-F} and @samp{--show-function-line} options find the nearest
+unchanged line that precedes each hunk of differences and matches the
+given regular expression.  Then they add that line to the end of the
+line of asterisks in the context format, or to the @samp{@@@@} line in
+unified format.  If no matching line exists, they leave the output for
+that hunk unchanged.  If that line is more than 40 characters long, they
+output only the first 40 characters.  You can specify more than one
+regular expression for such lines; @code{diff} tries to match each line
+against each regular expression, starting with the last one given.  This
+means that you can use @samp{-p} and @samp{-F} together, if you wish.
+
+@node C Function Headings, , Specified Headings, Sections
+@subsubsection Showing C Function Headings
+@cindex C function headings
+@cindex function headings, C
+
+To show in which functions differences occur for C and similar
+languages, you can use the @samp{-p} or @samp{--show-c-function} option.
+This option automatically defaults to the context output format
+(@pxref{Context Format}), with the default number of lines of context.
+You can override that number with @samp{-C @var{lines}} elsewhere in the
+command line.  You can override both the format and the number with
+@samp{-U @var{lines}} elsewhere in the command line.
+
+The @samp{-p} and @samp{--show-c-function} options are equivalent to
+@samp{-F'^[_a-zA-Z$]'} if the unified format is specified, otherwise
+@samp{-c -F'^[_a-zA-Z$]'} (@pxref{Specified Headings}).  GNU @code{diff}
+provides them for the sake of convenience.
+
+@node Alternate Names, , Sections, Context
+@subsection Showing Alternate File Names
+@cindex alternate file names
+@cindex file name alternates
+
+If you are comparing two files that have meaningless or uninformative
+names, you might want @code{diff} to show alternate names in the header
+of the context and unified output formats.  To do this, use the @samp{-L
+@var{label}} or @samp{--label=@var{label}} option.  The first time
+you give this option, its argument replaces the name and date of the
+first file in the header; the second time, its argument replaces the
+name and date of the second file.  If you give this option more than
+twice, @code{diff} reports an error.  The @samp{-L} option does not
+affect the file names in the @code{pr} header when the @samp{-l} or
+@samp{--paginate} option is used (@pxref{Pagination}).
+
+Here are the first two lines of the output from @samp{diff -C2
+-Loriginal -Lmodified lao tzu}:
+
+@example
+*** original
+--- modified
+@end example
+
+@node Side by Side, Scripts, Context, Output Formats
+@section Showing Differences Side by Side
+@cindex side by side
+@cindex two-column output
+@cindex columnar output
+
+@code{diff} can produce a side by side difference listing of two files.
+The files are listed in two columns with a gutter between them.  The
+gutter contains one of the following markers:
+
+@table @asis
+@item white space
+The corresponding lines are in common.  That is, either the lines are
+identical, or the difference is ignored because of one of the
+@samp{--ignore} options (@pxref{White Space}).
+
+@item @samp{|}
+The corresponding lines differ, and they are either both complete
+or both incomplete.
+
+@item @samp{<}
+The files differ and only the first file contains the line.
+
+@item @samp{>}
+The files differ and only the second file contains the line.
+
+@item @samp{(}
+Only the first file contains the line, but the difference is ignored.
+
+@item @samp{)}
+Only the second file contains the line, but the difference is ignored.
+
+@item @samp{\}
+The corresponding lines differ, and only the first line is incomplete.
+
+@item @samp{/}
+The corresponding lines differ, and only the second line is incomplete.
+@end table
+
+Normally, an output line is incomplete if and only if the lines that it
+contains are incomplete; @xref{Incomplete Lines}.  However, when an
+output line represents two differing lines, one might be incomplete
+while the other is not.  In this case, the output line is complete,
+but its the gutter is marked @samp{\} if the first line is incomplete,
+@samp{/} if the second line is.
+
+Side by side format is sometimes easiest to read, but it has limitations.
+It generates much wider output than usual, and truncates lines that are
+too long to fit.  Also, it relies on lining up output more heavily than
+usual, so its output looks particularly bad if you use varying
+width fonts, nonstandard tab stops, or nonprinting characters.
+
+You can use the @code{sdiff} command to interactively merge side by side
+differences.  @xref{Interactive Merging}, for more information on merging files.
+
+@menu
+* Side by Side Format::		Controlling side by side output format.
+* Example Side by Side::	Sample side by side output.
+@end menu
+
+@node Side by Side Format, Example Side by Side, , Side by Side
+@section Controlling Side by Side Format
+@cindex side by side format
+
+The @samp{-y} or @samp{--side-by-side} option selects side by side
+format.  Because side by side output lines contain two input lines, they
+are wider than usual.  They are normally 130 columns, which can fit onto
+a traditional printer line.  You can set the length of output lines with
+the @samp{-W @var{columns}} or @samp{--width=@var{columns}} option.  The
+output line is split into two halves of equal length, separated by a
+small gutter to mark differences; the right half is aligned to a tab
+stop so that tabs line up.  Input lines that are too long to fit in half
+of an output line are truncated for output.
+
+The @samp{--left-column} option prints only the left column of two
+common lines.  The @samp{--suppress-common-lines} option suppresses
+common lines entirely.
+
+@node Example Side by Side, , Side by Side Format, Side by Side
+@subsection An Example of Side by Side Format
+
+Here is the output of the command @samp{diff -y -W 72 lao tzu}
+(@pxref{Sample diff Input}, for the complete contents of the two files).
+
+@example
+The Way that can be told of is n   <
+The name that can be named is no   <
+The Nameless is the origin of He        The Nameless is the origin of He
+The Named is the mother of all t   |    The named is the mother of all t
+                                   >
+Therefore let there always be no        Therefore let there always be no
+  so we may see their subtlety,           so we may see their subtlety,
+And let there always be being,          And let there always be being,
+  so we may see their outcome.            so we may see their outcome.
+The two are the same,                   The two are the same,
+But after they are produced,            But after they are produced,
+  they have different names.              they have different names.
+                                   >    They both may be called deep and
+                                   >    Deeper and more profound,
+                                   >    The door of all subtleties!
+@end example
+
+@node Scripts, If-then-else, Side by Side, Output Formats
+@section Making Edit Scripts
+@cindex script output formats
+
+Several output modes produce command scripts for editing @var{from-file}
+to produce @var{to-file}.
+
+@menu
+* ed Scripts::		Using @code{diff} to produce commands for @code{ed}.
+* Forward ed::		Making forward @code{ed} scripts.
+* RCS::			A special @code{diff} output format used by RCS.
+@end menu
+
+@node ed Scripts, Forward ed, , Scripts
+@subsection @code{ed} Scripts
+@cindex @code{ed} script output format
+
+@code{diff} can produce commands that direct the @code{ed} text editor
+to change the first file into the second file.  Long ago, this was the
+only output mode that was suitable for editing one file into another
+automatically; today, with @code{patch}, it is almost obsolete.  Use the
+@samp{-e} or @samp{--ed} option to select this output format.
+
+Like the normal format (@pxref{Normal}), this output format does not
+show any context; unlike the normal format, it does not include the
+information necessary to apply the diff in reverse (to produce the first
+file if all you have is the second file and the diff).
+
+If the file @file{d} contains the output of @samp{diff -e old new}, then
+the command @samp{(cat d && echo w) | ed - old} edits @file{old} to make
+it a copy of @file{new}.  More generally, if @file{d1}, @file{d2},
+@dots{}, @file{dN} contain the outputs of @samp{diff -e old new1},
+@samp{diff -e new1 new2}, @dots{}, @samp{diff -e newN-1 newN},
+respectively, then the command @samp{(cat d1 d2 @dots{} dN && echo w) |
+ed - old} edits @file{old} to make it a copy of @file{newN}.
+
+@menu
+* Detailed ed::		A detailed description of @code{ed} format.
+* Example ed::		A sample @code{ed} script.
+@end menu
+
+@node Detailed ed, Example ed, , ed Scripts
+@subsubsection Detailed Description of @code{ed} Format
+
+The @code{ed} output format consists of one or more hunks of
+differences.  The changes closest to the ends of the files come first so
+that commands that change the number of lines do not affect how
+@code{ed} interprets line numbers in succeeding commands.  @code{ed}
+format hunks look like this:
+
+@example
+@var{change-command}
+@var{to-file-line}
+@var{to-file-line}@dots{}
+.
+@end example
+
+Because @code{ed} uses a single period on a line to indicate the end of
+input, GNU @code{diff} protects lines of changes that contain a single
+period on a line by writing two periods instead, then writing a
+subsequent @code{ed} command to change the two periods into one.  The
+@code{ed} format cannot represent an incomplete line, so if the second
+file ends in a changed incomplete line, @code{diff} reports an error and
+then pretends that a newline was appended.
+
+There are three types of change commands.  Each consists of a line
+number or comma-separated range of lines in the first file and a single
+character indicating the kind of change to make.  All line numbers are
+the original line numbers in the file.  The types of change commands
+are:
+
+@table @samp
+@item @var{l}a
+Add text from the second file after line @var{l} in the first file.  For
+example, @samp{8a} means to add the following lines after line 8 of file
+1.
+
+@item @var{r}c
+Replace the lines in range @var{r} in the first file with the following
+lines.  Like a combined add and delete, but more compact.  For example,
+@samp{5,7c} means change lines 5--7 of file 1 to read as the text file
+2.
+
+@item @var{r}d
+Delete the lines in range @var{r} from the first file.  For example,
+@samp{5,7d} means delete lines 5--7 of file 1.
+@end table
+
+@node Example ed, , Detailed ed, ed Scripts
+@subsubsection Example @code{ed} Script
+
+Here is the output of @samp{diff -e lao tzu} (@pxref{Sample
+diff Input}, for the complete contents of the two files):
+
+@example
+11a
+They both may be called deep and profound.
+Deeper and more profound,
+The door of all subtleties!
+.
+4c
+The named is the mother of all things.
+
+.
+1,2d
+@end example
+
+@node Forward ed, RCS, ed Scripts, Scripts
+@subsection Forward @code{ed} Scripts
+@cindex forward @code{ed} script output format
+
+@code{diff} can produce output that is like an @code{ed} script, but
+with hunks in forward (front to back) order.  The format of the commands
+is also changed slightly: command characters precede the lines they
+modify, spaces separate line numbers in ranges, and no attempt is made
+to disambiguate hunk lines consisting of a single period.  Like
+@code{ed} format, forward @code{ed} format cannot represent incomplete
+lines.
+
+Forward @code{ed} format is not very useful, because neither @code{ed}
+nor @code{patch} can apply diffs in this format.  It exists mainly for
+compatibility with older versions of @code{diff}.  Use the @samp{-f} or
+@samp{--forward-ed} option to select it.
+
+@node RCS, , Forward ed, Scripts
+@subsection RCS Scripts
+@cindex RCS script output format
+
+The RCS output format is designed specifically for use by the Revision
+Control System, which is a set of free programs used for organizing
+different versions and systems of files.  Use the @samp{-n} or
+@samp{--rcs} option to select this output format.  It is like the
+forward @code{ed} format (@pxref{Forward ed}), but it can represent
+arbitrary changes to the contents of a file because it avoids the
+forward @code{ed} format's problems with lines consisting of a single
+period and with incomplete lines.  Instead of ending text sections with
+a line consisting of a single period, each command specifies the number
+of lines it affects; a combination of the @samp{a} and @samp{d}
+commands are used instead of @samp{c}.  Also, if the second file ends
+in a changed incomplete line, then the output also ends in an
+incomplete line.
+
+Here is the output of @samp{diff -n lao tzu} (@pxref{Sample
+diff Input}, for the complete contents of the two files):
+
+@example
+d1 2
+d4 1
+a4 2
+The named is the mother of all things.
+
+a11 3
+They both may be called deep and profound.
+Deeper and more profound,
+The door of all subtleties!
+@end example
+
+@node If-then-else, , Scripts, Output Formats
+@section Merging Files with If-then-else
+@cindex merged output format
+@cindex if-then-else output format
+@cindex C if-then-else output format
+@cindex @code{ifdef} output format
+
+You can use @code{diff} to merge two files of C source code.  The output
+of @code{diff} in this format contains all the lines of both files.
+Lines common to both files are output just once; the differing parts are
+separated by the C preprocessor directives @code{#ifdef @var{name}} or
+@code{#ifndef @var{name}}, @code{#else}, and @code{#endif}.  When
+compiling the output, you select which version to use by either defining
+or leaving undefined the macro @var{name}.
+
+To merge two files, use @code{diff} with the @samp{-D @var{name}} or
+@samp{--ifdef=@var{name}} option.  The argument @var{name} is the C
+preprocessor identifier to use in the @code{#ifdef} and @code{#ifndef}
+directives.
+
+For example, if you change an instance of @code{wait (&s)} to
+@code{waitpid (-1, &s, 0)} and then merge the old and new files with
+the @samp{--ifdef=HAVE_WAITPID} option, then the affected part of your code
+might look like this:
+
+@example
+    do @{
+#ifndef HAVE_WAITPID
+        if ((w = wait (&s)) < 0  &&  errno != EINTR)
+#else /* HAVE_WAITPID */
+        if ((w = waitpid (-1, &s, 0)) < 0  &&  errno != EINTR)
+#endif /* HAVE_WAITPID */
+            return w;
+    @} while (w != child);
+@end example
+
+You can specify formats for languages other than C by using line group
+formats and line formats, as described in the next sections.
+
+@menu
+* Line Group Formats::		Formats for general if-then-else line groups.
+* Line Formats::		Formats for each line in a line group.
+* Detailed If-then-else::	A detailed description of if-then-else format.
+* Example If-then-else::	Sample if-then-else format output.
+@end menu
+
+@node Line Group Formats, Line Formats, , If-then-else
+@subsection Line Group Formats
+@cindex line group formats
+@cindex formats for if-then-else line groups
+
+Line group formats let you specify formats suitable for many
+applications that allow if-then-else input, including programming
+languages and text formatting languages.  A line group format specifies
+the output format for a contiguous group of similar lines.
+
+For example, the following command compares the TeX files @file{old}
+and @file{new}, and outputs a merged file in which old regions are
+surrounded by @samp{\begin@{em@}}-@samp{\end@{em@}} lines, and new
+regions are surrounded by @samp{\begin@{bf@}}-@samp{\end@{bf@}} lines.
+
+@example
+diff \
+   --old-group-format='\begin@{em@}
+%<\end@{em@}
+' \
+   --new-group-format='\begin@{bf@}
+%>\end@{bf@}
+' \
+   old new
+@end example
+
+The following command is equivalent to the above example, but it is a
+little more verbose, because it spells out the default line group formats.
+
+@example
+diff \
+   --old-group-format='\begin@{em@}
+%<\end@{em@}
+' \
+   --new-group-format='\begin@{bf@}
+%>\end@{bf@}
+' \
+   --unchanged-group-format='%=' \
+   --changed-group-format='\begin@{em@}
+%<\end@{em@}
+\begin@{bf@}
+%>\end@{bf@}
+' \
+   old new
+@end example
+
+Here is a more advanced example, which outputs a diff listing with
+headers containing line numbers in a ``plain English'' style.
+
+@example
+diff \
+   --unchanged-group-format='' \
+   --old-group-format='-------- %dn line%(n=1?:s) deleted at %df:
+%<' \
+   --new-group-format='-------- %dN line%(N=1?:s) added after %de:
+%>' \
+   --changed-group-format='-------- %dn line%(n=1?:s) changed at %df:
+%<-------- to:
+%>' \
+   old new
+@end example
+
+To specify a line group format, use @code{diff} with one of the options
+listed below.  You can specify up to four line group formats, one for
+each kind of line group.  You should quote @var{format}, because it
+typically contains shell metacharacters.
+
+@table @samp
+@item --old-group-format=@var{format}
+These line groups are hunks containing only lines from the first file.
+The default old group format is the same as the changed group format if
+it is specified; otherwise it is a format that outputs the line group as-is.
+
+@item --new-group-format=@var{format}
+These line groups are hunks containing only lines from the second
+file.  The default new group format is same as the the changed group
+format if it is specified; otherwise it is a format that outputs the
+line group as-is.
+
+@item --changed-group-format=@var{format}
+These line groups are hunks containing lines from both files.  The
+default changed group format is the concatenation of the old and new
+group formats.
+
+@item --unchanged-group-format=@var{format}
+These line groups contain lines common to both files.  The default
+unchanged group format is a format that outputs the line group as-is.
+@end table
+
+In a line group format, ordinary characters represent themselves;
+conversion specifications start with @samp{%} and have one of the
+following forms.
+
+@table @samp
+@item %<
+stands for the lines from the first file, including the trailing newline.
+Each line is formatted according to the old line format (@pxref{Line Formats}).
+
+@item %>
+stands for the lines from the second file, including the trailing newline.
+Each line is formatted according to the new line format.
+
+@item %=
+stands for the lines common to both files, including the trailing newline.
+Each line is formatted according to the unchanged line format.
+
+@item %%
+stands for @samp{%}.
+
+@item %c'@var{C}'
+where @var{C} is a single character, stands for @var{C}.
+@var{C} may not be a backslash or an apostrophe.
+For example, @samp{%c':'} stands for a colon, even inside
+the then-part of an if-then-else format, which a colon would
+normally terminate.
+
+@item %c'\@var{O}'
+where @var{O} is a string of 1, 2, or 3 octal digits,
+stands for the character with octal code @var{O}.
+For example, @samp{%c'\0'} stands for a null character.
+
+@item @var{F}@var{n}
+where @var{F} is a @code{printf} conversion specification and @var{n} is one
+of the following letters, stands for @var{n}'s value formatted with @var{F}.
+
+@table @samp
+@item e
+The line number of the line just before the group in the old file.
+
+@item f
+The line number of the first line in the group in the old file;
+equals @var{e} + 1.
+
+@item l
+The line number of the last line in the group in the old file.
+
+@item m
+The line number of the line just after the group in the old file;
+equals @var{l} + 1.
+
+@item n
+The number of lines in the group in the old file; equals @var{l} - @var{f} + 1.
+
+@item E, F, L, M, N
+Likewise, for lines in the new file.
+
+@end table
+
+The @code{printf} conversion specification can be @samp{%d},
+@samp{%o}, @samp{%x}, or @samp{%X}, specifying decimal, octal,
+lower case hexadecimal, or upper case hexadecimal output
+respectively.  After the @samp{%} the following options can appear in
+sequence: a @samp{-} specifying left-justification; an integer
+specifying the minimum field width; and a period followed by an
+optional integer specifying the minimum number of digits.
+For example, @samp{%5dN} prints the number of new lines in the group
+in a field of width 5 characters, using the @code{printf} format @code{"%5d"}.
+
+@item (@var{A}=@var{B}?@var{T}:@var{E})
+If @var{A} equals @var{B} then @var{T} else @var{E}.
+@var{A} and @var{B} are each either a decimal constant
+or a single letter interpreted as above.
+This format spec is equivalent to @var{T} if
+@var{A}'s value equals @var{B}'s; otherwise it is equivalent to @var{E}.
+
+For example, @samp{%(N=0?no:%dN) line%(N=1?:s)} is equivalent to
+@samp{no lines} if @var{N} (the number of lines in the group in the the
+new file) is 0, to @samp{1 line} if @var{N} is 1, and to @samp{%dN lines}
+otherwise.
+@end table
+
+@node Line Formats, Detailed If-then-else, Line Group Formats, If-then-else
+@subsection Line Formats
+@cindex line formats
+
+Line formats control how each line taken from an input file is
+output as part of a line group in if-then-else format.
+
+For example, the following command outputs text with a one-column
+change indicator to the left of the text.  The first column of output
+is @samp{-} for deleted lines, @samp{|} for added lines, and a space
+for unchanged lines.  The formats contain newline characters where
+newlines are desired on output.
+
+@example
+diff \
+   --old-line-format='-%l
+' \
+   --new-line-format='|%l
+' \
+   --unchanged-line-format=' %l
+' \
+   old new
+@end example
+
+To specify a line format, use one of the following options.  You should
+quote @var{format}, since it often contains shell metacharacters.
+
+@table @samp
+@item --old-line-format=@var{format}
+formats lines just from the first file.
+
+@item --new-line-format=@var{format}
+formats lines just from the second file.
+
+@item --unchanged-line-format=@var{format}
+formats lines common to both files.
+
+@item --line-format=@var{format}
+formats all lines; in effect, it sets all three above options simultaneously.
+@end table
+
+In a line format, ordinary characters represent themselves;
+conversion specifications start with @samp{%} and have one of the
+following forms.
+
+@table @samp
+@item %l
+stands for the the contents of the line, not counting its trailing
+newline (if any).  This format ignores whether the line is incomplete;
+@xref{Incomplete Lines}.
+
+@item %L
+stands for the the contents of the line, including its trailing newline
+(if any).  If a line is incomplete, this format preserves its
+incompleteness.
+
+@item %%
+stands for @samp{%}.
+
+@item %c'@var{C}'
+where @var{C} is a single character, stands for @var{C}.
+@var{C} may not be a backslash or an apostrophe.
+For example, @samp{%c':'} stands for a colon.
+
+@item %c'\@var{O}'
+where @var{O} is a string of 1, 2, or 3 octal digits,
+stands for the character with octal code @var{O}.
+For example, @samp{%c'\0'} stands for a null character.
+
+@item @var{F}n
+where @var{F} is a @code{printf} conversion specification,
+stands for the line number formatted with @var{F}.
+For example, @samp{%.5dn} prints the line number using the
+@code{printf} format @code{"%.5d"}.  @xref{Line Group Formats}, for
+more about printf conversion specifications.
+
+@end table
+
+The default line format is @samp{%l} followed by a newline character.
+
+If the input contains tab characters and it is important that they line
+up on output, you should ensure that @samp{%l} or @samp{%L} in a line
+format is just after a tab stop (e.g.@: by preceding @samp{%l} or
+@samp{%L} with a tab character), or you should use the @samp{-t} or
+@samp{--expand-tabs} option.
+
+Taken together, the line and line group formats let you specify many
+different formats.  For example, the following command uses a format
+similar to @code{diff}'s normal format.  You can tailor this command
+to get fine control over @code{diff}'s output.
+
+@example
+diff \
+   --old-line-format='< %l
+' \
+   --new-line-format='> %l
+' \
+   --old-group-format='%df%(f=l?:,%dl)d%dE
+%<' \
+   --new-group-format='%dea%dF%(F=L?:,%dL)
+%>' \
+   --changed-group-format='%df%(f=l?:,%dl)c%dF%(F=L?:,%dL)
+%<---
+%>' \
+   --unchanged-group-format='' \
+   old new
+@end example
+
+@node Detailed If-then-else, Example If-then-else, Line Formats, If-then-else
+@subsection Detailed Description of If-then-else Format
+
+For lines common to both files, @code{diff} uses the unchanged line
+group format.  For each hunk of differences in the merged output
+format, if the hunk contains only lines from the first file,
+@code{diff} uses the old line group format; if the hunk contains only
+lines from the second file, @code{diff} uses the new group format;
+otherwise, @code{diff} uses the changed group format.
+
+The old, new, and unchanged line formats specify the output format of
+lines from the first file, lines from the second file, and lines common
+to both files, respectively.
+
+The option @samp{--ifdef=@var{name}} is equivalent to
+the following sequence of options using shell syntax:
+
+@example
+--old-group-format='#ifndef @var{name}
+%<#endif /* not @var{name} */
+' \
+--new-group-format='#ifdef @var{name}
+%>#endif /* @var{name} */
+' \
+--unchanged-group-format='%=' \
+--changed-group-format='#ifndef @var{name}
+%<#else /* @var{name} */
+%>#endif /* @var{name} */
+'
+@end example
+
+You should carefully check the @code{diff} output for proper nesting.
+For example, when using the the @samp{-D @var{name}} or
+@samp{--ifdef=@var{name}} option, you should check that if the
+differing lines contain any of the C preprocessor directives
+@samp{#ifdef}, @samp{#ifndef}, @samp{#else}, @samp{#elif}, or
+@samp{#endif}, they are nested properly and match.  If they don't, you
+must make corrections manually.  It is a good idea to carefully check
+the resulting code anyway to make sure that it really does what you
+want it to; depending on how the input files were produced, the output
+might contain duplicate or otherwise incorrect code.
+
+The @code{patch} @samp{-D @var{name}} option behaves just like
+the @code{diff} @samp{-D @var{name}} option, except it operates on
+a file and a diff to produce a merged file; @xref{patch Options}.
+
+@node Example If-then-else, , Detailed If-then-else, If-then-else
+@subsection An Example of If-then-else Format
+
+Here is the output of @samp{diff -DTWO lao tzu} (@pxref{Sample
+diff Input}, for the complete contents of the two files):
+
+@example
+#ifndef TWO
+The Way that can be told of is not the eternal Way;
+The name that can be named is not the eternal name.
+#endif /* not TWO */
+The Nameless is the origin of Heaven and Earth;
+#ifndef TWO
+The Named is the mother of all things.
+#else /* TWO */
+The named is the mother of all things.
+
+#endif /* TWO */
+Therefore let there always be non-being,
+  so we may see their subtlety,
+And let there always be being,
+  so we may see their outcome.
+The two are the same,
+But after they are produced,
+  they have different names.
+#ifdef TWO
+They both may be called deep and profound.
+Deeper and more profound,
+The door of all subtleties!
+#endif /* TWO */
+@end example
+
+@node Comparing Directories, Adjusting Output, Output Formats, Top
+@chapter Comparing Directories
+
+You can use @code{diff} to compare some or all of the files in two
+directory trees.  When both file name arguments to @code{diff} are
+directories, it compares each file that is contained in both
+directories, examining file names in alphabetical order.  Normally
+@code{diff} is silent about pairs of files that contain no differences,
+but if you use the @samp{-s} or @samp{--report-identical-files} option,
+it reports pairs of identical files.  Normally @code{diff} reports
+subdirectories common to both directories without comparing
+subdirectories' files, but if you use the @samp{-r} or
+@samp{--recursive} option, it compares every corresponding pair of files
+in the directory trees, as many levels deep as they go.
+
+For file names that are in only one of the directories, @code{diff}
+normally does not show the contents of the file that exists; it reports
+only that the file exists in that directory and not in the other.  You
+can make @code{diff} act as though the file existed but was empty in the
+other directory, so that it outputs the entire contents of the file that
+actually exists.  (It is output as either an insertion or a
+deletion, depending on whether it is in the first or the second
+directory given.)  To do this, use the @samp{-N} or @samp{--new-file}
+option.
+
+If the older directory contains one or more large files that are not in
+the newer directory, you can make the patch smaller by using the
+@samp{-P} or @samp{--unidirectional-new-file} option instead of @samp{-N}.
+This option is like @samp{-N} except that it only inserts the contents
+of files that appear in the second directory but not the first (that is,
+files that were added).  At the top of the patch, write instructions for
+the user applying the patch to remove the files that were deleted before
+applying the patch.  @xref{Making Patches}, for more discussion of
+making patches for distribution.
+
+To ignore some files while comparing directories, use the @samp{-x
+@var{pattern}} or @samp{--exclude=@var{pattern}} option.  This option
+ignores any files or subdirectories whose base names match the shell
+pattern @var{pattern}.  Unlike in the shell, a period at the start of
+the base of a file name matches a wildcard at the start of a pattern.
+You should enclose @var{pattern} in quotes so that the shell does not
+expand it.  For example, the option @samp{-x '*.[ao]'} ignores any file
+whose name ends with @samp{.a} or @samp{.o}.
+
+This option accumulates if you specify it more than once.  For example,
+using the options @samp{-x 'RCS' -x '*,v'} ignores any file or
+subdirectory whose base name is @samp{RCS} or ends with @samp{,v}.
+
+If you need to give this option many times, you can instead put the
+patterns in a file, one pattern per line, and use the @samp{-X
+@var{file}} or @samp{--exclude-from=@var{file}} option.
+
+If you have been comparing two directories and stopped partway through,
+later you might want to continue where you left off.  You can do this by
+using the @samp{-S @var{file}} or @samp{--starting-file=@var{file}}
+option.  This compares only the file @var{file} and all alphabetically
+later files in the topmost directory level.
+
+@node Adjusting Output, diff Performance, Comparing Directories, Top
+@chapter Making @code{diff} Output Prettier
+
+@code{diff} provides several ways to adjust the appearance of its output.
+These adjustments can be applied to any output format.
+
+@menu
+* Tabs::		Preserving the alignment of tabstops.
+* Pagination::		Page numbering and timestamping @code{diff} output.
+@end menu
+
+@node Tabs, Pagination, , Adjusting Output
+@section Preserving Tabstop Alignment
+@cindex tabstop alignment
+@cindex aligning tabstops
+
+The lines of text in some of the @code{diff} output formats are preceded
+by one or two characters that indicate whether the text is inserted,
+deleted, or changed.  The addition of those characters can cause tabs to
+move to the next tabstop, throwing off the alignment of columns in the
+line.  GNU @code{diff} provides two ways to make tab-aligned columns
+line up correctly.
+
+The first way is to have @code{diff} convert all tabs into the correct
+number of spaces before outputting them; select this method with the
+@samp{-t} or @samp{--expand-tabs} option.  @code{diff} assumes that
+tabstops are set every 8 columns.  To use this form of output with
+@code{patch}, you must give @code{patch} the @samp{-l} or
+@samp{--ignore-white-space} option (@pxref{Changed White Space}, for more
+information).
+
+The other method for making tabs line up correctly is to add a tab
+character instead of a space after the indicator character at the
+beginning of the line.  This ensures that all following tab characters
+are in the same position relative to tabstops that they were in the
+original files, so that the output is aligned correctly.  Its
+disadvantage is that it can make long lines too long to fit on one line
+of the screen or the paper.  It also does not work with the unified
+output format, which does not have a space character after the change
+type indicator character.  Select this method with the @samp{-T} or
+@samp{--initial-tab} option.
+
+@node Pagination, , Tabs, Adjusting Output
+@section Paginating @code{diff} Output
+@cindex paginating @code{diff} output
+
+It can be convenient to have long output page-numbered and time-stamped.
+The @samp{-l} and @samp{--paginate} options do this by sending the
+@code{diff} output through the @code{pr} program.  Here is what the page
+header might look like for @samp{diff -lc lao tzu}:
+
+@example
+Mar 11 13:37 1991  diff -lc lao tzu Page 1
+@end example
+
+@node diff Performance, Comparing Three Files, Adjusting Output, Top
+@chapter @code{diff} Performance Tradeoffs
+@cindex performance of @code{diff}
+
+GNU @code{diff} runs quite efficiently; however, in some circumstances
+you can cause it to run faster or produce a more compact set of changes.
+There are two ways that you can affect the performance of GNU
+@code{diff} by changing the way it compares files.
+
+Performance has more than one dimension.  These options improve one
+aspect of performance at the cost of another, or they improve
+performance in some cases while hurting it in others.
+
+The way that GNU @code{diff} determines which lines have changed always
+comes up with a near-minimal set of differences.  Usually it is good
+enough for practical purposes.  If the @code{diff} output is large, you
+might want @code{diff} to use a modified algorithm that sometimes
+produces a smaller set of differences.  The @samp{-d} or
+@samp{--minimal} option does this; however, it can also cause
+@code{diff} to run more slowly than usual, so it is not the default
+behavior.
+
+When the files you are comparing are large and have small groups of
+changes scattered throughout them, you can use the @samp{-H} or
+@samp{--speed-large-files} option to make a different modification to
+the algorithm that @code{diff} uses.  If the input files have a constant
+small density of changes, this option speeds up the comparisons without
+changing the output.  If not, @code{diff} might produce a larger set of
+differences; however, the output will still be correct.
+
+Normally @code{diff} discards the prefix and suffix that is common to
+both files before it attempts to find a minimal set of differences.
+This makes @code{diff} run faster, but occasionally it may produce
+non-minimal output.  The @samp{--horizon-lines=@var{lines}} option
+prevents @code{diff} from discarding the last @var{lines} lines of the
+prefix and the first @var{lines} lines of the suffix.  This gives
+@code{diff} further opportunities to find a minimal output.
+
+@node Comparing Three Files, diff3 Merging, diff Performance, Top
+@chapter Comparing Three Files
+@cindex comparing three files
+@cindex format of @code{diff3} output
+
+Use the program @code{diff3} to compare three files and show any
+differences among them.  (@code{diff3} can also merge files; see
+@ref{diff3 Merging}).
+
+The ``normal'' @code{diff3} output format shows each hunk of
+differences without surrounding context.  Hunks are labeled depending
+on whether they are two-way or three-way, and lines are annotated by
+their location in the input files.
+
+@xref{Invoking diff3}, for more information on how to run @code{diff3}.
+
+@menu
+* Sample diff3 Input::		Sample @code{diff3} input for examples.
+* Detailed diff3 Normal::	A detailed description of normal output format.
+* diff3 Hunks::			The format of normal output format.
+* Example diff3 Normal::	Sample output in the normal format.
+@end menu
+
+@node Sample diff3 Input, Detailed diff3 Normal, , Comparing Three Files
+@section A Third Sample Input File
+@cindex @code{diff3} sample input
+@cindex sample input for @code{diff3}
+
+Here is a third sample file that will be used in examples to illustrate
+the output of @code{diff3} and how various options can change it.  The
+first two files are the same that we used for @code{diff} (@pxref{Sample
+diff Input}).  This is the third sample file, called @file{tao}:
+
+@example
+The Way that can be told of is not the eternal Way;
+The name that can be named is not the eternal name.
+The Nameless is the origin of Heaven and Earth;
+The named is the mother of all things.
+
+Therefore let there always be non-being,
+  so we may see their subtlety,
+And let there always be being,
+  so we may see their result.
+The two are the same,
+But after they are produced,
+  they have different names.
+
+  -- The Way of Lao-Tzu, tr. Wing-tsit Chan
+@end example
+
+@node Detailed diff3 Normal, diff3 Hunks, Sample diff3 Input, Comparing Three Files
+@section Detailed Description of @code{diff3} Normal Format
+
+Each hunk begins with a line marked @samp{====}.  Three-way hunks have
+plain @samp{====} lines, and two-way hunks have @samp{1}, @samp{2}, or
+@samp{3} appended to specify which of the three input files differ in
+that hunk.  The hunks contain copies of two or three sets of input
+lines each preceded by one or two commands identifying where the lines
+came from.
+
+Normally, two spaces precede each copy of an input line to distinguish
+it from the commands.  But with the @samp{-T} or @samp{--initial-tab}
+option, @code{diff3} uses a tab instead of two spaces; this lines up
+tabs correctly.  @xref{Tabs}, for more information.
+
+Commands take the following forms:
+
+@table @samp
+@item @var{file}:@var{l}a
+This hunk appears after line @var{l} of file @var{file}, and
+contains no lines in that file.  To edit this file to yield the other
+files, one must append hunk lines taken from the other files.  For
+example, @samp{1:11a} means that the hunk follows line 11 in the first
+file and contains no lines from that file.
+
+@item @var{file}:@var{r}c
+This hunk contains the lines in the range @var{r} of file @var{file}.
+The range @var{r} is a comma-separated pair of line numbers, or just one
+number if the range is a singleton.  To edit this file to yield the
+other files, one must change the specified lines to be the lines taken
+from the other files.  For example, @samp{2:11,13c} means that the hunk
+contains lines 11 through 13 from the second file.
+@end table
+
+If the last line in a set of input lines is incomplete
+(@pxref{Incomplete Lines}), it is distinguished on output from a full
+line by a following line that starts with @samp{\}.
+
+@node diff3 Hunks, Example diff3 Normal, Detailed diff3 Normal, Comparing Three Files
+@section @code{diff3} Hunks
+@cindex hunks for @code{diff3}
+@cindex @code{diff3} hunks
+
+Groups of lines that differ in two or three of the input files are
+called @dfn{diff3 hunks}, by analogy with @code{diff} hunks
+(@pxref{Hunks}).  If all three input files differ in a @code{diff3}
+hunk, the hunk is called a @dfn{three-way hunk}; if just two input files
+differ, it is a @dfn{two-way hunk}.
+
+As with @code{diff}, several solutions are possible.  When comparing the
+files @samp{A}, @samp{B}, and @samp{C}, @code{diff3} normally finds
+@code{diff3} hunks by merging the two-way hunks output by the two
+commands @samp{diff A B} and @samp{diff A C}.  This does not necessarily
+minimize the size of the output, but exceptions should be rare.
+
+For example, suppose @file{F} contains the three lines @samp{a},
+@samp{b}, @samp{f}, @file{G} contains the lines @samp{g}, @samp{b},
+@samp{g}, and @file{H} contains the lines @samp{a}, @samp{b},
+@samp{h}.  @samp{diff3 F G H} might output the following:
+
+@example
+====2
+1:1c
+3:1c
+  a
+2:1c
+  g
+====
+1:3c
+  f
+2:3c
+  g
+3:3c
+  h
+@end example
+
+@noindent
+because it found a two-way hunk containing @samp{a} in the first and
+third files and @samp{g} in the second file, then the single line
+@samp{b} common to all three files, then a three-way hunk containing
+the last line of each file.
+
+@node Example diff3 Normal, , diff3 Hunks, Comparing Three Files
+@section An Example of @code{diff3} Normal Format
+
+Here is the output of the command @samp{diff3 lao tzu tao}
+(@pxref{Sample diff3 Input}, for the complete contents of the files).
+Notice that it shows only the lines that are different among the three
+files.
+
+@example
+====2
+1:1,2c
+3:1,2c
+  The Way that can be told of is not the eternal Way;
+  The name that can be named is not the eternal name.
+2:0a
+====1
+1:4c
+  The Named is the mother of all things.
+2:2,3c
+3:4,5c
+  The named is the mother of all things.
+  
+====3
+1:8c
+2:7c
+    so we may see their outcome.
+3:9c
+    so we may see their result.
+====
+1:11a
+2:11,13c
+  They both may be called deep and profound.
+  Deeper and more profound,
+  The door of all subtleties!
+3:13,14c
+  
+    -- The Way of Lao-Tzu, tr. Wing-tsit Chan
+@end example
+
+@node diff3 Merging, Interactive Merging, Comparing Three Files, Top
+@chapter Merging From a Common Ancestor
+@cindex merging from a common ancestor
+
+When two people have made changes to copies of the same file,
+@code{diff3} can produce a merged output that contains both sets of
+changes together with warnings about conflicts.
+
+One might imagine programs with names like @code{diff4} and @code{diff5}
+to compare more than three files simultaneously, but in practice the
+need rarely arises.  You can use @code{diff3} to merge three or more
+sets of changes to a file by merging two change sets at a time.
+
+@code{diff3} can incorporate changes from two modified versions into a
+common preceding version.  This lets you merge the sets of changes
+represented by the two newer files.  Specify the common ancestor version
+as the second argument and the two newer versions as the first and third
+arguments, like this:
+
+@example
+diff3 @var{mine} @var{older} @var{yours}
+@end example
+
+@noindent
+You can remember the order of the arguments by noting that they are in
+alphabetical order.
+
+@cindex conflict
+@cindex overlap
+You can think of this as subtracting @var{older} from @var{yours} and
+adding the result to @var{mine}, or as merging into @var{mine} the
+changes that would turn @var{older} into @var{yours}.  This merging is
+well-defined as long as @var{mine} and @var{older} match in the
+neighborhood of each such change.  This fails to be true when all three
+input files differ or when only @var{older} differs; we call this
+a @dfn{conflict}.  When all three input files differ, we call the
+conflict an @dfn{overlap}.
+
+@code{diff3} gives you several ways to handle overlaps and conflicts.
+You can omit overlaps or conflicts, or select only overlaps,
+or mark conflicts with special @samp{<<<<<<<} and @samp{>>>>>>>} lines.
+
+@code{diff3} can output the merge results as an @code{ed} script that
+that can be applied to the first file to yield the merged output.
+However, it is usually better to have @code{diff3} generate the merged
+output directly; this bypasses some problems with @code{ed}.
+
+@menu
+* Which Changes::		Selecting changes to incorporate.
+* Marking Conflicts::		Marking conflicts.
+* Bypassing ed::		Generating merged output directly.
+* Merging Incomplete Lines::	How @code{diff3} merges incomplete lines.
+* Saving the Changed File::	Emulating System V behavior.
+@end menu
+
+@node Which Changes, Marking Conflicts, , diff3 Merging
+@section Selecting Which Changes to Incorporate
+@cindex overlapping change, selection of
+@cindex unmerged change
+
+You can select all unmerged changes from @var{older} to @var{yours} for merging
+into @var{mine} with the @samp{-e} or @samp{--ed} option.  You can
+select only the nonoverlapping unmerged changes with @samp{-3} or
+@samp{--easy-only}, and you can select only the overlapping changes with
+@samp{-x} or @samp{--overlap-only}.
+
+The @samp{-e}, @samp{-3} and @samp{-x} options select only
+@dfn{unmerged changes}, i.e.@: changes where @var{mine} and @var{yours}
+differ; they ignore changes from @var{older} to @var{yours} where
+@var{mine} and @var{yours} are identical, because they assume that such
+changes have already been merged.  If this assumption is not a safe
+one, you can use the @samp{-A} or @samp{--show-all} option
+(@pxref{Marking Conflicts}).
+
+Here is the output of the command @code{diff3} with each of these three
+options (@pxref{Sample diff3 Input}, for the complete contents of the files).
+Notice that @samp{-e} outputs the union of the disjoint sets of changes
+output by @samp{-3} and @samp{-x}.
+
+Output of @samp{diff3 -e lao tzu tao}:
+@example
+11a
+
+  -- The Way of Lao-Tzu, tr. Wing-tsit Chan
+.
+8c
+  so we may see their result.
+.
+@end example
+
+Output of @samp{diff3 -3 lao tzu tao}:
+@example
+8c
+  so we may see their result.
+.
+@end example
+
+Output of @samp{diff3 -x lao tzu tao}:
+@example
+11a
+
+  -- The Way of Lao-Tzu, tr. Wing-tsit Chan
+.
+@end example
+
+@node Marking Conflicts, Bypassing ed, Which Changes, diff3 Merging
+@section Marking Conflicts
+@cindex conflict marking
+@cindex @samp{<<<<<<<} for marking conflicts
+
+@code{diff3} can mark conflicts in the merged output by
+bracketing them with special marker lines.  A conflict
+that comes from two files @var{A} and @var{B} is marked as follows:
+
+@example
+<<<<<<< @var{A}
+@r{lines from @var{A}}
+=======
+@r{lines from @var{B}}
+>>>>>>> @var{B}
+@end example
+
+A conflict that comes from three files @var{A}, @var{B} and @var{C} is
+marked as follows:
+
+@example
+<<<<<<< @var{A}
+@r{lines from @var{A}}
+||||||| @var{B}
+@r{lines from @var{B}}
+=======
+@r{lines from @var{C}}
+>>>>>>> @var{C}
+@end example
+
+The @samp{-A} or @samp{--show-all} option acts like the @samp{-e}
+option, except that it brackets conflicts, and it outputs all changes
+from @var{older} to @var{yours}, not just the unmerged changes.  Thus,
+given the sample input files (@pxref{Sample diff3 Input}), @samp{diff3
+-A lao tzu tao} puts brackets around the conflict where only @file{tzu}
+differs:
+
+@example
+<<<<<<< tzu
+=======
+The Way that can be told of is not the eternal Way;
+The name that can be named is not the eternal name.
+>>>>>>> tao
+@end example
+
+And it outputs the three-way conflict as follows:
+
+@example
+<<<<<<< lao
+||||||| tzu
+They both may be called deep and profound.
+Deeper and more profound,
+The door of all subtleties!
+=======
+
+  -- The Way of Lao-Tzu, tr. Wing-tsit Chan
+>>>>>>> tao
+@end example
+
+The @samp{-E} or @samp{--show-overlap} option outputs less information
+than the @samp{-A} or @samp{--show-all} option, because it outputs only
+unmerged changes, and it never outputs the contents of the second
+file.  Thus the @samp{-E} option acts like the @samp{-e} option,
+except that it brackets the first and third files from three-way
+overlapping changes.  Similarly, @samp{-X} acts like @samp{-x}, except
+it brackets all its (necessarily overlapping) changes.  For example,
+for the three-way overlapping change above, the @samp{-E} and @samp{-X}
+options output the following:
+
+@example
+<<<<<<< lao
+=======
+
+  -- The Way of Lao-Tzu, tr. Wing-tsit Chan
+>>>>>>> tao
+@end example
+
+If you are comparing files that have meaningless or uninformative names,
+you can use the @samp{-L @var{label}} or @samp{--label=@var{label}}
+option to show alternate names in the @samp{<<<<<<<}, @samp{|||||||}
+and @samp{>>>>>>>} brackets.  This option can be given up to three
+times, once for each input file.  Thus @samp{diff3 -A -L X -L Y -L Z A
+B C} acts like @samp{diff3 -A A B C}, except that the output looks like
+it came from files named @samp{X}, @samp{Y} and @samp{Z} rather than
+from files named @samp{A}, @samp{B} and @samp{C}.
+
+@node Bypassing ed, Merging Incomplete Lines, Marking Conflicts, diff3 Merging
+@section Generating the Merged Output Directly
+@cindex merged @code{diff3} format
+
+With the @samp{-m} or @samp{--merge} option, @code{diff3} outputs the
+merged file directly.  This is more efficient than using @code{ed} to
+generate it, and works even with non-text files that @code{ed} would
+reject.  If you specify @samp{-m} without an @code{ed} script option,
+@samp{-A} (@samp{--show-all}) is assumed.
+
+For example, the command @samp{diff3 -m lao tzu tao}
+(@pxref{Sample diff3 Input} for a copy of the input files) would output
+the following:
+
+@example
+<<<<<<< tzu
+=======
+The Way that can be told of is not the eternal Way;
+The name that can be named is not the eternal name.
+>>>>>>> tao
+The Nameless is the origin of Heaven and Earth;
+The Named is the mother of all things.
+Therefore let there always be non-being,
+  so we may see their subtlety,
+And let there always be being,
+  so we may see their result.
+The two are the same,
+But after they are produced,
+  they have different names.
+<<<<<<< lao
+||||||| tzu
+They both may be called deep and profound.
+Deeper and more profound,
+The door of all subtleties!
+=======
+
+  -- The Way of Lao-Tzu, tr. Wing-tsit Chan
+>>>>>>> tao
+@end example
+
+@node Merging Incomplete Lines, Saving the Changed File, Bypassing ed, diff3 Merging
+@section How @code{diff3} Merges Incomplete Lines
+@cindex incomplete line merging
+
+With @samp{-m}, incomplete lines (@pxref{Incomplete Lines}) are simply
+copied to the output as they are found; if the merged output ends in an
+conflict and one of the input files ends in an incomplete
+line, succeeding @samp{|||||||}, @samp{=======} or @samp{>>>>>>>}
+brackets appear somewhere other than the start of a line because
+they are appended to the incomplete line.
+
+Without @samp{-m}, if an @code{ed} script option is specified and an
+incomplete line is found, @code{diff3} generates a warning and acts as
+if a newline had been present.
+
+@node Saving the Changed File, , Merging Incomplete Lines, diff3 Merging
+@section Saving the Changed File
+@cindex System V @code{diff3} compatibility
+
+Traditional Unix @code{diff3} generates an @code{ed} script without the
+trailing @samp{w} and and @samp{q} commands that save the changes.
+System V @code{diff3} generates these extra commands.  GNU @code{diff3}
+normally behaves like traditional Unix @code{diff3}, but with the
+@samp{-i} option it behaves like System V @code{diff3} and appends the
+@samp{w} and @samp{q} commands.
+
+The @samp{-i} option requires one of the @code{ed} script options
+@samp{-AeExX3}, and is incompatible with the merged output option
+@samp{-m}.
+
+@node Interactive Merging, Merging with patch, diff3 Merging, Top
+@chapter Interactive Merging with @code{sdiff}
+@cindex diff merging
+@cindex interactive merging
+
+With @code{sdiff}, you can merge two files interactively based on a
+side-by-side @samp{-y} format comparison (@pxref{Side by Side}).  Use
+@samp{-o @var{file}} or @samp{--output=@var{file}} to specify where to
+put the merged text.  @xref{Invoking sdiff}, for more details on the
+options to @code{sdiff}.
+
+Another way to merge files interactively is to use the Emacs Lisp
+package @code{emerge}.  @xref{emerge, , emerge, emacs, The GNU Emacs
+Manual}, for more information.
+
+@menu
+* sdiff Option Summary::Summary of @code{sdiff} options.
+* Merge Commands::	Merging two files interactively.
+@end menu
+
+@node sdiff Option Summary, Merge Commands, , Interactive Merging
+@section Specifying @code{diff} Options to @code{sdiff}
+@cindex @code{sdiff} output format
+
+The following @code{sdiff} options have the same meaning as for
+@code{diff}.  @xref{diff Options}, for the use of these options.
+
+@example
+-a -b -d -i -t -v
+-B -H -I @var{regexp}
+
+--ignore-blank-lines  --ignore-case
+--ignore-matching-lines=@var{regexp}  --ignore-space-change
+--left-column  --minimal  --speed-large-files
+--suppress-common-lines  --expand-tabs
+--text  --version  --width=@var{columns}
+@end example
+
+For historical reasons, @code{sdiff} has alternate names for some
+options.  The @samp{-l} option is equivalent to the @samp{--left-column}
+option, and similarly @samp{-s} is equivalent to
+@samp{--suppress-common-lines}.  The meaning of the @code{sdiff}
+@samp{-w} and @samp{-W} options is interchanged from that of
+@code{diff}: with @code{sdiff}, @samp{-w @var{columns}} is equivalent to
+@samp{--width=@var{columns}}, and @samp{-W} is equivalent to
+@samp{--ignore-all-space}.  @code{sdiff} without the @samp{-o} option is
+equivalent to @code{diff} with the @samp{-y} or @samp{--side-by-side}
+option (@pxref{Side by Side}).
+
+@node Merge Commands, , sdiff Option Summary, Interactive Merging
+@section Merge Commands
+@cindex merge commands
+@cindex merging interactively
+
+Groups of common lines, with a blank gutter, are copied from the first
+file to the output.  After each group of differing lines, @code{sdiff}
+prompts with @samp{%} and pauses, waiting for one of the following
+commands.  Follow each command with @key{RET}.
+
+@table @samp
+@item e
+Discard both versions.
+Invoke a text editor on an empty temporary file,
+then copy the resulting file to the output.
+
+@item eb
+Concatenate the two versions, edit the result in a temporary file,
+then copy the edited result to the output.
+
+@item el
+Edit a copy of the left version, then copy the result to the output.
+
+@item er
+Edit a copy of the right version, then copy the result to the output.
+
+@item l
+Copy the left version to the output.
+
+@item q
+Quit.
+
+@item r
+Copy the right version to the output.
+
+@item s
+Silently copy common lines.
+
+@item v
+Verbosely copy common lines.  This is the default.
+@end table
+
+The text editor invoked is specified by the @code{EDITOR} environment
+variable if it is set.  The default is system-dependent.
+
+@node Merging with patch, Making Patches, Interactive Merging, Top
+@chapter Merging with @code{patch}
+
+@code{patch} takes comparison output produced by @code{diff} and applies
+the differences to a copy of the original file, producing a patched
+version.  With @code{patch}, you can distribute just the changes to a
+set of files instead of distributing the entire file set; your
+correspondents can apply @code{patch} to update their copy of the files
+with your changes.  @code{patch} automatically determines the diff
+format, skips any leading or trailing headers, and uses the headers to
+determine which file to patch.  This lets your correspondents feed an
+article or message containing a difference listing directly to
+@code{patch}.
+
+@code{patch} detects and warns about common problems like forward
+patches.  It saves the original version of the files it patches, and
+saves any patches that it could not apply.  It can also maintain a
+@code{patchlevel.h} file to ensures that your correspondents apply
+diffs in the proper order.
+
+@code{patch} accepts a series of diffs in its standard input, usually
+separated by headers that specify which file to patch.  It applies
+@code{diff} hunks (@pxref{Hunks}) one by one.  If a hunk does not
+exactly match the original file, @code{patch} uses heuristics to try to
+patch the file as well as it can.  If no approximate match can be found,
+@code{patch} rejects the hunk and skips to the next hunk.  @code{patch}
+normally replaces each file @var{f} with its new version, saving the
+original file in @samp{@var{f}.orig}, and putting reject hunks (if any)
+into @samp{@var{f}.rej}.
+
+@xref{Invoking patch}, for detailed information on the options to
+@code{patch}.  @xref{Backups}, for more information on how
+@code{patch} names backup files.  @xref{Rejects}, for more information
+on where @code{patch} puts reject hunks.
+
+@menu
+* patch Input::		Selecting the type of @code{patch} input.
+* Imperfect::		Dealing with imperfect patches.
+* Empty Files::		Removing empty files after patching.
+* Multiple Patches::	Handling multiple patches in a file specially.
+* patch Messages::	Messages and questions @code{patch} can produce.
+@end menu
+
+@node patch Input, Imperfect, , Merging with patch
+@section Selecting the @code{patch} Input Format
+@cindex @code{patch} input format
+
+@code{patch} normally determines which @code{diff} format the patch
+file uses by examining its contents.  For patch files that contain
+particularly confusing leading text, you might need to use one of the
+following options to force @code{patch} to interpret the patch file as a
+certain format of diff.  The output formats listed here are the only
+ones that @code{patch} can understand.
+
+@table @samp
+@item -c
+@itemx --context
+context diff.
+
+@item -e
+@itemx --ed
+@code{ed} script.
+
+@item -n
+@itemx --normal
+normal diff.
+
+@item -u
+@itemx --unified
+unified diff.
+@end table
+
+@node Imperfect, Empty Files, patch Input, Merging with patch
+@section Applying Imperfect Patches
+@cindex imperfect patch application
+
+@code{patch} tries to skip any leading text in the patch file, apply the
+diff, and then skip any trailing text.  Thus you can feed a news article
+or mail message directly to @code{patch}, and it should work.  If the
+entire diff is indented by a constant amount of white space, @code{patch}
+automatically ignores the indentation.
+
+However, certain other types of imperfect input require user
+intervention.
+
+@menu
+* Changed White Space::	When tabs and spaces don't match exactly.
+* Reversed Patches::	Applying reversed patches correctly.
+* Inexact::		Helping @code{patch} find close matches.
+@end menu
+
+@node Changed White Space, Reversed Patches, , Imperfect
+@subsection Applying Patches with Changed White Space
+@cindex white space in patches
+
+Sometimes mailers, editors, or other programs change spaces into tabs,
+or vice versa.  If this happens to a patch file or an input file, the
+files might look the same, but @code{patch} will not be able to match
+them properly.  If this problem occurs, use the @samp{-l} or
+@samp{--ignore-white-space} option, which makes @code{patch} compare
+white space loosely so that any sequence of white space in the patch file
+matches any sequence of white space in the input files.  Non-white-space
+characters must still match exactly.  Each line of the context must
+still match a line in the input file.
+
+@node Reversed Patches, Inexact, Changed White Space, Imperfect
+@subsection Applying Reversed Patches
+@cindex reversed patches
+
+Sometimes people run @code{diff} with the new file first instead of
+second.  This creates a diff that is ``reversed''.  To apply such
+patches, give @code{patch} the @samp{-R} or @samp{--reverse} option.
+@code{patch} then attempts to swap each hunk around before applying it.
+Rejects come out in the swapped format.  The @samp{-R} option does not
+work with @code{ed} scripts because there is too little information in
+them to reconstruct the reverse operation.
+
+Often @code{patch} can guess that the patch is reversed.  If the first
+hunk of a patch fails, @code{patch} reverses the hunk to see if it can
+apply it that way.  If it can, @code{patch} asks you if you want to have
+the @samp{-R} option set; if it can't, @code{patch} continues to apply
+the patch normally.  This method cannot detect a reversed patch if it is
+a normal diff and the first command is an append (which should have been
+a delete) since appends always succeed, because a null context matches
+anywhere.  But most patches add or change lines rather than delete them,
+so most reversed normal diffs begin with a delete, which fails, and
+@code{patch} notices.
+
+If you apply a patch that you have already applied, @code{patch} thinks
+it is a reversed patch and offers to un-apply the patch.  This could be
+construed as a feature.  If you did this inadvertently and you don't
+want to un-apply the patch, just answer @samp{n} to this offer and to
+the subsequent ``apply anyway'' question---or type @kbd{C-c} to kill the
+@code{patch} process.
+
+@node Inexact, , Reversed Patches, Imperfect
+@subsection Helping @code{patch} Find Inexact Matches
+@cindex inexact patches
+@cindex fuzz factor when patching
+
+For context diffs, and to a lesser extent normal diffs, @code{patch} can
+detect when the line numbers mentioned in the patch are incorrect, and
+it attempts to find the correct place to apply each hunk of the patch.
+As a first guess, it takes the line number mentioned in the hunk, plus
+or minus any offset used in applying the previous hunk.  If that is not
+the correct place, @code{patch} scans both forward and backward for a
+set of lines matching the context given in the hunk.
+
+First @code{patch} looks for a place where all lines of the context
+match.  If it cannot find such a place, and it is reading a context or
+unified diff, and the maximum fuzz factor is set to 1 or more, then
+@code{patch} makes another scan, ignoring the first and last line of
+context.  If that fails, and the maximum fuzz factor is set to 2 or
+more, it makes another scan, ignoring the first two and last two lines
+of context are ignored.  It continues similarly if the maximum fuzz
+factor is larger.
+
+The @samp{-F @var{lines}} or @samp{--fuzz=@var{lines}} option sets the
+maximum fuzz factor to @var{lines}.  This option only applies to context
+and unified diffs; it ignores up to @var{lines} lines while looking for
+the place to install a hunk.  Note that a larger fuzz factor increases
+the odds of making a faulty patch.  The default fuzz factor is 2; it may
+not be set to more than the number of lines of context in the diff,
+ordinarily 3.
+
+If @code{patch} cannot find a place to install a hunk of the patch, it
+writes the hunk out to a reject file (@pxref{Rejects}, for information
+on how reject files are named).  It writes out rejected hunks in context
+format no matter what form the input patch is in.  If the input is a
+normal or @code{ed} diff, many of the contexts are simply null.  The
+line numbers on the hunks in the reject file may be different from those
+in the patch file: they show the approximate location where @code{patch}
+thinks the failed hunks belong in the new file rather than in the old
+one.
+
+As it completes each hunk, @code{patch} tells you whether the hunk
+succeeded or failed, and if it failed, on which line (in the new file)
+@code{patch} thinks the hunk should go.  If this is different from the
+line number specified in the diff, it tells you the offset.  A single
+large offset @emph{may} indicate that @code{patch} installed a hunk in
+the wrong place.  @code{patch} also tells you if it used a fuzz factor
+to make the match, in which case you should also be slightly suspicious.
+
+@code{patch} cannot tell if the line numbers are off in an @code{ed}
+script, and can only detect wrong line numbers in a normal diff when it
+finds a change or delete command.  It may have the same problem with a
+context diff using a fuzz factor equal to or greater than the number of
+lines of context shown in the diff (typically 3).  In these cases, you
+should probably look at a context diff between your original and patched
+input files to see if the changes make sense.  Compiling without errors
+is a pretty good indication that the patch worked, but not a guarantee.
+
+@code{patch} usually produces the correct results, even when it must
+make many guesses.  However, the results are guaranteed only when
+the patch is applied to an exact copy of the file that the patch was
+generated from.
+
+@node Empty Files, Multiple Patches, Imperfect, Merging with patch
+@section Removing Empty Files
+@cindex empty files, removing
+@cindex removing empty files
+
+Sometimes when comparing two directories, the first directory contains a
+file that the second directory does not.  If you give @code{diff} the
+@samp{-N} or @samp{--new-file} option, it outputs a diff that deletes
+the contents of this file.  By default, @code{patch} leaves an empty
+file after applying such a diff.  The @samp{-E} or
+@samp{--remove-empty-files} option to @code{patch} deletes output files
+that are empty after applying the diff.
+
+@node Multiple Patches, patch Messages, Empty Files, Merging with patch
+@section Multiple Patches in a File
+@cindex multiple patches
+
+If the patch file contains more than one patch, @code{patch} tries to
+apply each of them as if they came from separate patch files.  This
+means that it determines the name of the file to patch for each patch,
+and that it examines the leading text before each patch for file names
+and prerequisite revision level (@pxref{Making Patches}, for more on
+that topic).
+
+For the second and subsequent patches in the patch file, you can give
+options and another original file name by separating their argument
+lists with a @samp{+}.  However, the argument list for a second or
+subsequent patch may not specify a new patch file, since that does not
+make sense.
+
+For example, to tell @code{patch} to strip the first three slashes from
+the name of the first patch in the patch file and none from subsequent
+patches, and to use @file{code.c} as the first input file, you can use:
+
+@example
+patch -p3 code.c + -p0 < patchfile
+@end example
+
+The @samp{-S} or @samp{--skip} option ignores the current patch from the
+patch file, but continue looking for the next patch in the file.  Thus,
+to ignore the first and third patches in the patch file, you can use:
+
+@example
+patch -S + + -S + < patch file
+@end example
+
+@node patch Messages, , Multiple Patches, Merging with patch
+@section Messages and Questions from @code{patch}
+@cindex @code{patch} messages and questions
+@cindex diagnostics from @code{patch}
+@cindex messages from @code{patch}
+
+@code{patch} can produce a variety of messages, especially if it has
+trouble decoding its input.  In a few situations where it's not sure how
+to proceed, @code{patch} normally prompts you for more information from
+the keyboard.  There are options to suppress printing non-fatal messages
+and stopping for keyboard input.
+
+The message @samp{Hmm...} indicates that @code{patch} is reading text in
+the patch file, attempting to determine whether there is a patch in that
+text, and if so, what kind of patch it is.
+
+You can inhibit all terminal output from @code{patch}, unless an error
+occurs, by using the @samp{-s}, @samp{--quiet}, or @samp{--silent}
+option.
+
+There are two ways you can prevent @code{patch} from asking you any
+questions.  The @samp{-f} or @samp{--force} option assumes that you know
+what you are doing.  It assumes the following:
+
+@itemize @bullet
+@item
+skip patches that do not contain file names in their headers;
+
+@item
+patch files even though they have the wrong version for the
+@samp{Prereq:} line in the patch;
+
+@item
+assume that patches are not reversed even if they look like they are.
+@end itemize
+
+The @samp{-t} or @samp{--batch} option is similar to @samp{-f}, in that
+it suppresses questions, but it makes somewhat different assumptions:
+
+@itemize @bullet
+@item
+skip patches that do not contain file names in their headers
+(the same as @samp{-f});
+
+@item
+skip patches for which the file has the wrong version for the
+@samp{Prereq:} line in the patch;
+
+@item
+assume that patches are reversed if they look like they are.
+@end itemize
+
+@code{patch} exits with a non-zero status if it creates any reject
+files.  When applying a set of patches in a loop, you should check the
+exit status, so you don't apply a later patch to a partially patched
+file.
+
+@node Making Patches, Invoking cmp, Merging with patch, Top
+@chapter Tips for Making Patch Distributions
+@cindex patch making tips
+@cindex tips for patch making
+
+Here are some things you should keep in mind if you are going to
+distribute patches for updating a software package.
+
+Make sure you have specified the file names correctly, either in a
+context diff header or with an @samp{Index:} line.  If you are patching
+files in a subdirectory, be sure to tell the patch user to specify a
+@samp{-p} or @samp{--strip} option as needed.  Take care to not send out
+reversed patches, since these make people wonder whether they have
+already applied the patch.
+
+To save people from partially applying a patch before other patches that
+should have gone before it, you can make the first patch in the patch
+file update a file with a name like @file{patchlevel.h} or
+@file{version.c}, which contains a patch level or version number.  If
+the input file contains the wrong version number, @code{patch} will
+complain immediately.
+
+An even clearer way to prevent this problem is to put a @samp{Prereq:}
+line before the patch.  If the leading text in the patch file contains a
+line that starts with @samp{Prereq:}, @code{patch} takes the next word
+from that line (normally a version number) and checks whether the next
+input file contains that word, preceded and followed by either
+white space or a newline.  If not, @code{patch} prompts you for
+confirmation before proceeding.  This makes it difficult to accidentally
+apply patches in the wrong order.
+
+Since @code{patch} does not handle incomplete lines properly, make sure
+that all the source files in your program end with a newline whenever
+you release a version.
+
+To create a patch that changes an older version of a package into a
+newer version, first make a copy of the older version in a scratch
+directory.  Typically you do that by unpacking a @code{tar} or
+@code{shar} archive of the older version.
+
+You might be able to reduce the size of the patch by renaming or
+removing some files before making the patch.  If the older version of
+the package contains any files that the newer version does not, or if
+any files have been renamed between the two versions, make a list of
+@code{rm} and @code{mv} commands for the user to execute in the old
+version directory before applying the patch.  Then run those commands
+yourself in the scratch directory.
+
+If there are any files that you don't need to include in the patch
+because they can easily be rebuilt from other files (for example,
+@file{TAGS} and output from @code{yacc} and @code{makeinfo}), replace
+the versions in the scratch directory with the newer versions, using
+@code{rm} and @code{ln} or @code{cp}.
+
+Now you can create the patch.  The de-facto standard @code{diff} format
+for patch distributions is context format with two lines of context,
+produced by giving @code{diff} the @samp{-C 2} option.  Do not use less
+than two lines of context, because @code{patch} typically needs at
+least two lines for proper operation.  Give @code{diff} the @samp{-P}
+option in case the newer version of the package contains any files that
+the older one does not.  Make sure to specify the scratch directory
+first and the newer directory second.
+
+Add to the top of the patch a note telling the user any @code{rm} and
+@code{mv} commands to run before applying the patch.  Then you can
+remove the scratch directory.
+
+@node Invoking cmp, Invoking diff, Making Patches, Top
+@chapter Invoking @code{cmp}
+@cindex invoking @code{cmp}
+@cindex @code{cmp} invocation
+
+The @code{cmp} command compares two files, and if they differ, tells the
+first byte and line number where they differ.  Its arguments are as
+follows:
+
+@example
+cmp @var{options}@dots{} @var{from-file} @r{[}@var{to-file}@var{]}
+@end example
+
+The file name @samp{-} is always the standard input.  @code{cmp} also
+uses the standard input if one file name is omitted.
+
+An exit status of 0 means no differences were found, 1 means some
+differences were found, and 2 means trouble.
+
+@menu
+* cmp Options::		Summary of options to @code{cmp}.
+@end menu
+
+@node cmp Options, , , Invoking cmp
+@section Options to @code{cmp}
+@cindex @code{cmp} options
+@cindex options for @code{cmp}
+
+Below is a summary of all of the options that GNU @code{cmp} accepts.
+Most options have two equivalent names, one of which is a single letter
+preceded by @samp{-}, and the other of which is a long name preceded by
+@samp{--}.  Multiple single letter options (unless they take an
+argument) can be combined into a single command line word: @samp{-cl} is
+equivalent to @samp{-c -l}.
+
+@table @samp
+@item -c
+Print the differing characters.  Display control characters as a
+@samp{^} followed by a letter of the alphabet and precede characters
+that have the high bit set with @samp{M-} (which stands for ``meta'').
+
+@item --ignore-initial=@var{bytes}
+Ignore any differences in the the first @var{bytes} bytes of the input files.
+Treat files with fewer than @var{bytes} bytes as if they are empty.
+
+@item -l
+Print the (decimal) offsets and (octal) values of all differing bytes.
+
+@item --print-chars
+Print the differing characters.  Display control characters as a
+@samp{^} followed by a letter of the alphabet and precede characters
+that have the high bit set with @samp{M-} (which stands for ``meta'').
+
+@item --quiet
+@itemx -s
+@itemx --silent
+Do not print anything; only return an exit status indicating whether
+the files differ.
+
+@item --verbose
+Print the (decimal) offsets and (octal) values of all differing bytes.
+
+@item -v
+@item --version
+Output the version number of @code{cmp}.
+@end table
+
+@node Invoking diff, Invoking diff3, Invoking cmp, Top
+@chapter Invoking @code{diff}
+@cindex invoking @code{diff}
+@cindex @code{diff} invocation
+
+The format for running the @code{diff} command is:
+
+@example
+diff @var{options}@dots{} @var{from-file} @var{to-file}
+@end example
+
+In the simplest case, @code{diff} compares the contents of the two files
+@var{from-file} and @var{to-file}.  A file name of @samp{-} stands for
+text read from the standard input.  As a special case, @samp{diff - -}
+compares a copy of standard input to itself.
+
+If @var{from-file} is a directory and @var{to-file} is not, @code{diff}
+compares the file in @var{from-file} whose file name is that of @var{to-file},
+and vice versa.  The non-directory file must not be @samp{-}.
+
+If both @var{from-file} and @var{to-file} are directories,
+@code{diff} compares corresponding files in both directories, in
+alphabetical order; this comparison is not recursive unless the
+@samp{-r} or @samp{--recursive} option is given.  @code{diff} never
+compares the actual contents of a directory as if it were a file.  The
+file that is fully specified may not be standard input, because standard
+input is nameless and the notion of ``file with the same name'' does not
+apply.
+
+@code{diff} options begin with @samp{-}, so normally @var{from-file} and
+@var{to-file} may not begin with @samp{-}.  However, @samp{--} as an
+argument by itself treats the remaining arguments as file names even if
+they begin with @samp{-}.
+
+An exit status of 0 means no differences were found, 1 means some
+differences were found, and 2 means trouble.
+
+@menu
+* diff Options::	Summary of options to @code{diff}.
+@end menu
+
+@node diff Options, , , Invoking diff
+@section Options to @code{diff}
+@cindex @code{diff} options
+@cindex options for @code{diff}
+
+Below is a summary of all of the options that GNU @code{diff} accepts.
+Most options have two equivalent names, one of which is a single letter
+preceded by @samp{-}, and the other of which is a long name preceded by
+@samp{--}.  Multiple single letter options (unless they take an
+argument) can be combined into a single command line word: @samp{-ac} is
+equivalent to @samp{-a -c}.  Long named options can be abbreviated to
+any unique prefix of their name.  Brackets ([ and ]) indicate that an
+option takes an optional argument.
+
+@table @samp
+@item -@var{lines}
+Show @var{lines} (an integer) lines of context.  This option does not
+specify an output format by itself; it has no effect unless it is
+combined with @samp{-c} (@pxref{Context Format}) or @samp{-u}
+(@pxref{Unified Format}).  This option is obsolete.  For proper
+operation, @code{patch} typically needs at least two lines of context.
+
+@item -a
+Treat all files as text and compare them line-by-line, even if they
+do not seem to be text.  @xref{Binary}.
+
+@item -b
+Ignore changes in amount of white space.  @xref{White Space}.
+
+@item -B
+Ignore changes that just insert or delete blank lines.  @xref{Blank
+Lines}.
+
+@item --binary
+Read and write data in binary mode.  @xref{Binary}.
+
+@item --brief
+Report only whether the files differ, not the details of the
+differences.  @xref{Brief}.
+
+@item -c
+Use the context output format.  @xref{Context Format}.
+
+@item -C @var{lines}
+@itemx --context@r{[}=@var{lines}@r{]}
+Use the context output format, showing @var{lines} (an integer) lines of
+context, or three if @var{lines} is not given.  @xref{Context Format}.
+For proper operation, @code{patch} typically needs at least two lines of
+context.
+
+@item --changed-group-format=@var{format}
+Use @var{format} to output a line group containing differing lines from
+both files in if-then-else format.  @xref{Line Group Formats}.
+
+@item -d
+Change the algorithm perhaps find a smaller set of changes.  This makes
+@code{diff} slower (sometimes much slower).  @xref{diff Performance}.
+
+@item -D @var{name}
+Make merged @samp{#ifdef} format output, conditional on the preprocessor
+macro @var{name}.  @xref{If-then-else}.
+
+@item -e
+@itemx --ed
+Make output that is a valid @code{ed} script.  @xref{ed Scripts}.
+
+@item --exclude=@var{pattern}
+When comparing directories, ignore files and subdirectories whose basenames
+match @var{pattern}.  @xref{Comparing Directories}.
+
+@item --exclude-from=@var{file}
+When comparing directories, ignore files and subdirectories whose basenames
+match any pattern contained in @var{file}.  @xref{Comparing Directories}.
+
+@item --expand-tabs
+Expand tabs to spaces in the output, to preserve the alignment of tabs
+in the input files.  @xref{Tabs}.
+
+@item -f
+Make output that looks vaguely like an @code{ed} script but has changes
+in the order they appear in the file.  @xref{Forward ed}.
+
+@item -F @var{regexp}
+In context and unified format, for each hunk of differences, show some
+of the last preceding line that matches @var{regexp}.  @xref{Specified
+Headings}.
+
+@item --forward-ed
+Make output that looks vaguely like an @code{ed} script but has changes
+in the order they appear in the file.  @xref{Forward ed}.
+
+@item -h
+This option currently has no effect; it is present for Unix
+compatibility.
+
+@item -H
+Use heuristics to speed handling of large files that have numerous
+scattered small changes.  @xref{diff Performance}.
+
+@item --horizon-lines=@var{lines}
+Do not discard the last @var{lines} lines of the common prefix
+and the first @var{lines} lines of the common suffix.
+@xref{diff Performance}.
+
+@item -i
+Ignore changes in case; consider upper- and lower-case letters
+equivalent.  @xref{Case Folding}.
+
+@item -I @var{regexp}
+Ignore changes that just insert or delete lines that match @var{regexp}.
+@xref{Specified Folding}.
+
+@item --ifdef=@var{name}
+Make merged if-then-else output using @var{name}.  @xref{If-then-else}.
+
+@item --ignore-all-space
+Ignore white space when comparing lines.  @xref{White Space}.
+
+@item --ignore-blank-lines
+Ignore changes that just insert or delete blank lines.  @xref{Blank
+Lines}.
+
+@item --ignore-case
+Ignore changes in case; consider upper- and lower-case to be the same.
+@xref{Case Folding}.
+
+@item --ignore-matching-lines=@var{regexp}
+Ignore changes that just insert or delete lines that match @var{regexp}.
+@xref{Specified Folding}.
+
+@item --ignore-space-change
+Ignore changes in amount of white space.
+@xref{White Space}.
+
+@item --initial-tab
+Output a tab rather than a space before the text of a line in normal or
+context format.  This causes the alignment of tabs in the line to look
+normal.  @xref{Tabs}.
+
+@item -l
+Pass the output through @code{pr} to paginate it.  @xref{Pagination}.
+
+@item -L @var{label}
+Use @var{label} instead of the file name in the context format
+(@pxref{Context Format}) and unified format (@pxref{Unified Format})
+headers.  @xref{RCS}.
+
+@item --label=@var{label}
+Use @var{label} instead of the file name in the context format
+(@pxref{Context Format}) and unified format (@pxref{Unified Format})
+headers.
+
+@item --left-column
+Print only the left column of two common lines in side by side format.
+@xref{Side by Side Format}.
+
+@item --line-format=@var{format}
+Use @var{format} to output all input lines in if-then-else format.
+@xref{Line Formats}.
+
+@item --minimal
+Change the algorithm to perhaps find a smaller set of changes.  This
+makes @code{diff} slower (sometimes much slower).  @xref{diff
+Performance}.
+
+@item -n
+Output RCS-format diffs; like @samp{-f} except that each command
+specifies the number of lines affected.  @xref{RCS}.
+
+@item -N
+@itemx --new-file
+In directory comparison, if a file is found in only one directory,
+treat it as present but empty in the other directory.  @xref{Comparing
+Directories}.
+
+@item --new-group-format=@var{format}
+Use @var{format} to output a group of lines taken from just the second
+file in if-then-else format.  @xref{Line Group Formats}.
+
+@item --new-line-format=@var{format}
+Use @var{format} to output a line taken from just the second file in
+if-then-else format.  @xref{Line Formats}.
+
+@item --old-group-format=@var{format}
+Use @var{format} to output a group of lines taken from just the first
+file in if-then-else format.  @xref{Line Group Formats}.
+
+@item --old-line-format=@var{format}
+Use @var{format} to output a line taken from just the first file in
+if-then-else format.  @xref{Line Formats}.
+
+@item -p
+Show which C function each change is in.  @xref{C Function Headings}.
+
+@item -P
+When comparing directories, if a file appears only in the second
+directory of the two, treat it as present but empty in the other.
+@xref{Comparing Directories}.
+
+@item --paginate
+Pass the output through @code{pr} to paginate it.  @xref{Pagination}.
+
+@item -q
+Report only whether the files differ, not the details of the
+differences.  @xref{Brief}.
+
+@item -r
+When comparing directories, recursively compare any subdirectories
+found.  @xref{Comparing Directories}.
+
+@item --rcs
+Output RCS-format diffs; like @samp{-f} except that each command
+specifies the number of lines affected.  @xref{RCS}.
+
+@item --recursive
+When comparing directories, recursively compare any subdirectories
+found.  @xref{Comparing Directories}.
+
+@item --report-identical-files
+Report when two files are the same.  @xref{Comparing Directories}.
+
+@item -s
+Report when two files are the same.  @xref{Comparing Directories}.
+
+@item -S @var{file}
+When comparing directories, start with the file @var{file}.  This is
+used for resuming an aborted comparison.  @xref{Comparing Directories}.
+
+@item --sdiff-merge-assist
+Print extra information to help @code{sdiff}.  @code{sdiff} uses this
+option when it runs @code{diff}.  This option is not intended for users
+to use directly.
+
+@item --show-c-function
+Show which C function each change is in.  @xref{C Function Headings}.
+
+@item --show-function-line=@var{regexp}
+In context and unified format, for each hunk of differences, show some
+of the last preceding line that matches @var{regexp}.  @xref{Specified
+Headings}.
+
+@item --side-by-side
+Use the side by side output format.  @xref{Side by Side Format}.
+
+@item --speed-large-files
+Use heuristics to speed handling of large files that have numerous
+scattered small changes.  @xref{diff Performance}.
+
+@item --starting-file=@var{file}
+When comparing directories, start with the file @var{file}.  This is
+used for resuming an aborted comparison.  @xref{Comparing Directories}.
+
+@item --suppress-common-lines
+Do not print common lines in side by side format.
+@xref{Side by Side Format}.
+
+@item -t
+Expand tabs to spaces in the output, to preserve the alignment of tabs
+in the input files.  @xref{Tabs}.
+
+@item -T
+Output a tab rather than a space before the text of a line in normal or
+context format.  This causes the alignment of tabs in the line to look
+normal.  @xref{Tabs}.
+
+@item --text
+Treat all files as text and compare them line-by-line, even if they
+do not appear to be text.  @xref{Binary}.
+
+@item -u
+Use the unified output format.  @xref{Unified Format}.
+
+@item --unchanged-group-format=@var{format}
+Use @var{format} to output a group of common lines taken from both files
+in if-then-else format.  @xref{Line Group Formats}.
+
+@item --unchanged-line-format=@var{format}
+Use @var{format} to output a line common to both files in if-then-else
+format.  @xref{Line Formats}.
+
+@item --unidirectional-new-file
+When comparing directories, if a file appears only in the second
+directory of the two, treat it as present but empty in the other.
+@xref{Comparing Directories}.
+
+@item -U @var{lines}
+@itemx --unified@r{[}=@var{lines}@r{]}
+Use the unified output format, showing @var{lines} (an integer) lines of
+context, or three if @var{lines} is not given.  @xref{Unified Format}.
+For proper operation, @code{patch} typically needs at least two lines of
+context.
+
+@item -v
+@itemx --version
+Output the version number of @code{diff}.
+
+@item -w
+Ignore white space when comparing lines.  @xref{White Space}.
+
+@item -W @var{columns}
+@itemx --width=@var{columns}
+Use an output width of @var{columns} in side by side format.
+@xref{Side by Side Format}.
+
+@item -x @var{pattern}
+When comparing directories, ignore files and subdirectories whose basenames
+match @var{pattern}.  @xref{Comparing Directories}.
+
+@item -X @var{file}
+When comparing directories, ignore files and subdirectories whose basenames
+match any pattern contained in @var{file}.  @xref{Comparing Directories}.
+
+@item -y
+Use the side by side output format.  @xref{Side by Side Format}.
+@end table
+
+@node Invoking diff3, Invoking patch, Invoking diff, Top
+@chapter Invoking @code{diff3}
+@cindex invoking @code{diff3}
+@cindex @code{diff3} invocation
+
+The @code{diff3} command compares three files and outputs descriptions
+of their differences.  Its arguments are as follows:
+
+@example
+diff3 @var{options}@dots{} @var{mine} @var{older} @var{yours}
+@end example
+
+The files to compare are @var{mine}, @var{older}, and @var{yours}.
+At most one of these three file names may be @samp{-},
+which tells @code{diff3} to read the standard input for that file.
+
+An exit status of 0 means @code{diff3} was successful, 1 means some
+conflicts were found, and 2 means trouble.
+
+@menu
+* diff3 Options::		Summary of options to @code{diff3}.
+@end menu
+
+@node diff3 Options, , , Invoking diff3
+@section Options to @code{diff3}
+@cindex @code{diff3} options
+@cindex options for @code{diff3}
+
+Below is a summary of all of the options that GNU @code{diff3}
+accepts.  Multiple single letter options (unless they take an argument)
+can be combined into a single command line argument.
+
+@table @samp
+@item -a
+Treat all files as text and compare them line-by-line, even if they
+do not appear to be text.  @xref{Binary}.
+
+@item -A
+Incorporate all changes from @var{older} to @var{yours} into @var{mine},
+surrounding all conflicts with bracket lines.
+@xref{Marking Conflicts}.
+
+@item -e
+Generate an @code{ed} script that incorporates all the changes from
+@var{older} to @var{yours} into @var{mine}.  @xref{Which Changes}.
+
+@item -E
+Like @samp{-e}, except bracket lines from overlapping changes' first
+and third files.
+@xref{Marking Conflicts}.
+With @samp{-e}, an overlapping change looks like this:
+
+@example
+<<<<<<< @var{mine}
+@r{lines from @var{mine}}
+=======
+@r{lines from @var{yours}}
+>>>>>>> @var{yours}
+@end example
+
+@item --ed
+Generate an @code{ed} script that incorporates all the changes from
+@var{older} to @var{yours} into @var{mine}.  @xref{Which Changes}.
+
+@item --easy-only
+Like @samp{-e}, except output only the nonoverlapping changes.
+@xref{Which Changes}.
+
+@item -i
+Generate @samp{w} and @samp{q} commands at the end of the @code{ed}
+script for System V compatibility.  This option must be combined with
+one of the @samp{-AeExX3} options, and may not be combined with @samp{-m}.
+@xref{Saving the Changed File}.
+
+@item --initial-tab
+Output a tab rather than two spaces before the text of a line in normal format.
+This causes the alignment of tabs in the line to look normal.  @xref{Tabs}.
+
+@item -L @var{label}
+@itemx --label=@var{label}
+Use the label @var{label} for the brackets output by the @samp{-A},
+@samp{-E} and @samp{-X} options.  This option may be given up to three
+times, one for each input file.  The default labels are the names of
+the input files.  Thus @samp{diff3 -L X -L Y -L Z -m A B C} acts like
+@samp{diff3 -m A B C}, except that the output looks like it came from
+files named @samp{X}, @samp{Y} and @samp{Z} rather than from files
+named @samp{A}, @samp{B} and @samp{C}.  @xref{Marking Conflicts}.
+
+@item -m
+@itemx --merge
+Apply the edit script to the first file and send the result to standard
+output.  Unlike piping the output from @code{diff3} to @code{ed}, this
+works even for binary files and incomplete lines.  @samp{-A} is assumed
+if no edit script option is specified.  @xref{Bypassing ed}.
+
+@item --overlap-only
+Like @samp{-e}, except output only the overlapping changes.
+@xref{Which Changes}.
+
+@item --show-all
+Incorporate all unmerged changes from @var{older} to @var{yours} into
+@var{mine}, surrounding all overlapping changes with bracket lines.
+@xref{Marking Conflicts}.
+
+@item --show-overlap
+Like @samp{-e}, except bracket lines from overlapping changes' first
+and third files.
+@xref{Marking Conflicts}.
+
+@item -T
+Output a tab rather than two spaces before the text of a line in normal format.
+This causes the alignment of tabs in the line to look normal.  @xref{Tabs}.
+
+@item --text
+Treat all files as text and compare them line-by-line, even if they
+do not appear to be text.  @xref{Binary}.
+
+@item -v
+@itemx --version
+Output the version number of @code{diff3}.
+
+@item -x
+Like @samp{-e}, except output only the overlapping changes.
+@xref{Which Changes}.
+
+@item -X
+Like @samp{-E}, except output only the overlapping changes.
+In other words, like @samp{-x}, except bracket changes as in @samp{-E}.
+@xref{Marking Conflicts}.
+
+@item -3
+Like @samp{-e}, except output only the nonoverlapping changes.
+@xref{Which Changes}.
+@end table
+
+@node Invoking patch, Invoking sdiff, Invoking diff3, Top
+@chapter Invoking @code{patch}
+@cindex invoking @code{patch}
+@cindex @code{patch} invocation
+
+Normally @code{patch} is invoked like this:
+
+@example
+patch <@var{patchfile}
+@end example
+
+The full format for invoking @code{patch} is:
+
+@example
+patch @var{options}@dots{} @r{[}@var{origfile} @r{[}@var{patchfile}@r{]}@r{]} @r{[}+ @var{options}@dots{} @r{[}@var{origfile}@r{]}@r{]}@dots{}
+@end example
+
+If you do not specify @var{patchfile}, or if @var{patchfile} is
+@samp{-}, @code{patch} reads the patch (that is, the @code{diff} output)
+from the standard input.
+
+You can specify one or more of the original files as @var{orig} arguments;
+each one and options for interpreting it is separated from the others with a
+@samp{+}.  @xref{Multiple Patches}, for more information.
+
+If you do not specify an input file on the command line, @code{patch}
+tries to figure out from the @dfn{leading text} (any text in the patch
+that comes before the @code{diff} output) which file to edit.  In the
+header of a context or unified diff, @code{patch} looks in lines
+beginning with @samp{***}, @samp{---}, or @samp{+++}; among those, it
+chooses the shortest name of an existing file.  Otherwise, if there is
+an @samp{Index:} line in the leading text, @code{patch} tries to use the
+file name from that line.  If @code{patch} cannot figure out the name of
+an existing file from the leading text, it prompts you for the name of
+the file to patch.
+
+If the input file does not exist or is read-only, and a suitable RCS or
+SCCS file exists, @code{patch} attempts to check out or get the file
+before proceeding.
+
+By default, @code{patch} replaces the original input file with the
+patched version, after renaming the original file into a backup file
+(@pxref{Backups}, for a description of how @code{patch} names backup
+files).  You can also specify where to put the output with the @samp{-o
+@var{output-file}} or @samp{--output=@var{output-file}} option.
+
+@menu
+* patch Directories::	Changing directory and stripping directories.
+* Backups::		Backup file names.
+* Rejects::		Reject file names.
+* patch Options::	Summary table of options to @code{patch}.
+@end menu
+
+@node patch Directories, Backups, , Invoking patch
+@section Applying Patches in Other Directories
+@cindex directories and patch
+@cindex patching directories
+
+The @samp{-d @var{directory}} or @samp{--directory=@var{directory}}
+option to @code{patch} makes directory @var{directory} the current
+directory for interpreting both file names in the patch file, and file
+names given as arguments to other options (such as @samp{-B} and
+@samp{-o}).  For example, while in a news reading program, you can patch
+a file in the @file{/usr/src/emacs} directory directly from the article
+containing the patch like this:
+
+@example
+| patch -d /usr/src/emacs
+@end example
+
+Sometimes the file names given in a patch contain leading directories,
+but you keep your files in a directory different from the one given in
+the patch.  In those cases, you can use the
+@samp{-p@r{[}@var{number}@r{]}} or @samp{--strip@r{[}=@var{number}@r{]}}
+option to set the file name strip count to @var{number}.  The strip
+count tells @code{patch} how many slashes, along with the directory
+names between them, to strip from the front of file names.  @samp{-p}
+with no @var{number} given is equivalent to @samp{-p0}.  By default,
+@code{patch} strips off all leading directories, leaving just the base file
+names, except that when a file name given in the patch is a relative
+file name and all of its leading directories already exist, @code{patch} does
+not strip off the leading directory.  (A @dfn{relative} file name is one
+that does not start with a slash.)
+
+@code{patch} looks for each file (after any slashes have been stripped)
+in the current directory, or if you used the @samp{-d @var{directory}}
+option, in that directory.
+
+For example, suppose the file name in the patch file is
+@file{/gnu/src/emacs/etc/NEWS}.  Using @samp{-p} or @samp{-p0} gives the
+entire file name unmodified, @samp{-p1} gives
+@file{gnu/src/emacs/etc/NEWS} (no leading slash), @samp{-p4} gives
+@file{etc/NEWS}, and not specifying @samp{-p} at all gives @file{NEWS}.
+
+@node Backups, Rejects, patch Directories, Invoking patch
+@section Backup File Names
+@cindex backup file names
+
+Normally, @code{patch} renames an original input file into a backup file
+by appending to its name the extension @samp{.orig}, or @samp{~} on
+systems that do not support long file names.  The @samp{-b
+@var{backup-suffix}} or @samp{--suffix=@var{backup-suffix}} option uses
+@var{backup-suffix} as the backup extension instead.
+
+Alternately, you can specify the extension for backup files with the
+@code{SIMPLE_BACKUP_SUFFIX} environment variable, which the options
+override.
+
+@code{patch} can also create numbered backup files the way GNU Emacs
+does.  With this method, instead of having a single backup of each file,
+@code{patch} makes a new backup file name each time it patches a file.
+For example, the backups of a file named @file{sink} would be called,
+successively, @file{sink.~1~}, @file{sink.~2~}, @file{sink.~3~}, etc.
+
+The @samp{-V @var{backup-style}} or
+@samp{--version-control=@var{backup-style}} option takes as an argument
+a method for creating backup file names.  You can alternately control
+the type of backups that @code{patch} makes with the
+@code{VERSION_CONTROL} environment variable, which the @samp{-V} option
+overrides.  The value of the @code{VERSION_CONTROL} environment variable
+and the argument to the @samp{-V} option are like the GNU Emacs
+@code{version-control} variable (@pxref{Backups,
+emacs, The GNU Emacs Manual}, for more information on backup versions in
+Emacs).  They also recognize synonyms that are more descriptive.  The
+valid values are listed below; unique abbreviations are acceptable.
+
+@table @samp
+@item t
+@itemx numbered
+Always make numbered backups.
+
+@item nil
+@itemx existing
+Make numbered backups of files that already have them, simple backups of
+the others.  This is the default.
+
+@item never
+@itemx simple
+Always make simple backups.
+@end table
+
+Alternately, you can tell @code{patch} to prepend a prefix, such as a
+directory name, to produce backup file names.  The @samp{-B
+@var{backup-prefix}} or @samp{--prefix=@var{backup-prefix}} option makes
+backup files by prepending @var{backup-prefix} to them.  If you use this
+option, @code{patch} ignores any @samp{-b} option that you give.
+
+If the backup file already exists, @code{patch} creates a new backup
+file name by changing the first lowercase letter in the last component
+of the file name into uppercase.  If there are no more lowercase letters
+in the name, it removes the first character from the name.  It repeats
+this process until it comes up with a backup file name that does not
+already exist.
+
+If you specify the output file with the @samp{-o} option, that file is
+the one that is backed up, not the input file.
+
+@node Rejects, patch Options, Backups, Invoking patch
+@section Reject File Names
+@cindex reject file names
+
+The names for reject files (files containing patches that @code{patch}
+could not find a place to apply) are normally the name of the output
+file with @samp{.rej} appended (or @samp{#} on systems that do not
+support long file names).
+
+Alternatively, you can tell @code{patch} to place all of the rejected
+patches in a single file.  The @samp{-r @var{reject-file}} or
+@samp{--reject-file=@var{reject-file}} option uses @var{reject-file} as
+the reject file name.
+
+@node patch Options, , Rejects, Invoking patch
+@section Options to @code{patch}
+@cindex @code{patch} options
+@cindex options for @code{patch}
+
+Here is a summary of all of the options that @code{patch} accepts.
+Older versions of @code{patch} do not accept long-named options or the
+@samp{-t}, @samp{-E}, or @samp{-V} options.
+
+Multiple single-letter options that do not take an argument can be
+combined into a single command line argument (with only one dash).
+Brackets ([ and ]) indicate that an option takes an optional argument.
+
+@table @samp
+@item -b @var{backup-suffix}
+Use @var{backup-suffix} as the backup extension instead of
+@samp{.orig} or @samp{~}.  @xref{Backups}.
+
+@item -B @var{backup-prefix}
+Use @var{backup-prefix} as a prefix to the backup file name.  If this
+option is specified, any @samp{-b} option is ignored.  @xref{Backups}.
+
+@item --batch
+Do not ask any questions.  @xref{patch Messages}.
+
+@item -c
+@itemx --context
+Interpret the patch file as a context diff.  @xref{patch Input}.
+
+@item -d @var{directory}
+@itemx --directory=@var{directory}
+Makes directory @var{directory} the current directory for interpreting
+both file names in the patch file, and file names given as arguments to
+other options.  @xref{patch Directories}.
+
+@item -D @var{name}
+Make merged if-then-else output using @var{format}.  @xref{If-then-else}.
+
+@item --debug=@var{number}
+Set internal debugging flags.  Of interest only to @code{patch}
+patchers.
+
+@item -e
+@itemx --ed
+Interpret the patch file as an @code{ed} script.  @xref{patch Input}.
+
+@item -E
+Remove output files that are empty after the patches have been applied.
+@xref{Empty Files}.
+
+@item -f
+Assume that the user knows exactly what he or she is doing, and do not
+ask any questions.  @xref{patch Messages}.
+
+@item -F @var{lines}
+Set the maximum fuzz factor to @var{lines}.  @xref{Inexact}.
+
+@item --force
+Assume that the user knows exactly what he or she is doing, and do not
+ask any questions.  @xref{patch Messages}.
+
+@item --forward
+Ignore patches that @code{patch} thinks are reversed or already applied.
+See also @samp{-R}.  @xref{Reversed Patches}.
+
+@item --fuzz=@var{lines}
+Set the maximum fuzz factor to @var{lines}.  @xref{Inexact}.
+
+@item --help
+Print a summary of the options that @code{patch} recognizes, then exit.
+
+@item --ifdef=@var{name}
+Make merged if-then-else output using @var{format}.  @xref{If-then-else}.
+
+@item --ignore-white-space
+@itemx -l
+Let any sequence of white space in the patch file match any sequence of
+white space in the input file.  @xref{Changed White Space}.
+
+@item -n
+@itemx --normal
+Interpret the patch file as a normal diff.  @xref{patch Input}.
+
+@item -N
+Ignore patches that @code{patch} thinks are reversed or already applied.
+See also @samp{-R}.  @xref{Reversed Patches}.
+
+@item -o @var{output-file}
+@itemx --output=@var{output-file}
+Use @var{output-file} as the output file name.  @xref{patch Options}.
+
+@item -p@r{[}@var{number}@r{]}
+Set the file name strip count to @var{number}.  @xref{patch Directories}.
+
+@item --prefix=@var{backup-prefix}
+Use @var{backup-prefix} as a prefix to the backup file name.  If this
+option is specified, any @samp{-b} option is ignored.  @xref{Backups}.
+
+@item --quiet
+Work silently unless an error occurs.  @xref{patch Messages}.
+
+@item -r @var{reject-file}
+Use @var{reject-file} as the reject file name.  @xref{Rejects}.
+
+@item -R
+Assume that this patch was created with the old and new files swapped.
+@xref{Reversed Patches}.
+
+@item --reject-file=@var{reject-file}
+Use @var{reject-file} as the reject file name.  @xref{Rejects}.
+
+@item --remove-empty-files
+Remove output files that are empty after the patches have been applied.
+@xref{Empty Files}.
+
+@item --reverse
+Assume that this patch was created with the old and new files swapped.
+@xref{Reversed Patches}.
+
+@item -s
+Work silently unless an error occurs.  @xref{patch Messages}.
+
+@item -S
+Ignore this patch from the patch file, but continue looking for the next
+patch in the file.  @xref{Multiple Patches}.
+
+@item --silent
+Work silently unless an error occurs.  @xref{patch Messages}.
+
+@item --skip
+Ignore this patch from the patch file, but continue looking for the next
+patch in the file.  @xref{Multiple Patches}.
+
+@item --strip@r{[}=@var{number}@r{]}
+Set the file name strip count to @var{number}.  @xref{patch Directories}.
+
+@item --suffix=@var{backup-suffix}
+Use @var{backup-suffix} as the backup extension instead of
+@samp{.orig} or @samp{~}.  @xref{Backups}.
+
+@item -t
+Do not ask any questions.  @xref{patch Messages}.
+
+@item -u
+@itemx --unified
+Interpret the patch file as a unified diff.  @xref{patch Input}.
+
+@item -v
+Output the revision header and patch level of @code{patch}.
+
+@item -V @var{backup-style}
+Select the kind of backups to make.  @xref{Backups}.
+
+@item --version
+Output the revision header and patch level of @code{patch}, then exit.
+
+@item --version=control=@var{backup-style}
+Select the kind of backups to make.  @xref{Backups}.
+
+@item -x @var{number}
+Set internal debugging flags.  Of interest only to @code{patch}
+patchers.
+@end table
+
+@node Invoking sdiff, Incomplete Lines, Invoking patch, Top
+@chapter Invoking @code{sdiff}
+@cindex invoking @code{sdiff}
+@cindex @code{sdiff} invocation
+
+The @code{sdiff} command merges two files and interactively outputs the
+results.  Its arguments are as follows:
+
+@example
+sdiff -o @var{outfile} @var{options}@dots{} @var{from-file} @var{to-file}
+@end example
+
+This merges @var{from-file} with @var{to-file}, with output to @var{outfile}.
+If @var{from-file} is a directory and @var{to-file} is not, @code{sdiff}
+compares the file in @var{from-file} whose file name is that of @var{to-file},
+and vice versa.  @var{from-file} and @var{to-file} may not both be
+directories.
+
+@code{sdiff} options begin with @samp{-}, so normally @var{from-file}
+and @var{to-file} may not begin with @samp{-}.  However, @samp{--} as an
+argument by itself treats the remaining arguments as file names even if
+they begin with @samp{-}.  You may not use @samp{-} as an input file.
+
+An exit status of 0 means no differences were found, 1 means some
+differences were found, and 2 means trouble.
+
+@code{sdiff} without @samp{-o} (or @samp{--output}) produces a
+side-by-side difference.  This usage is obsolete; use @samp{diff
+--side-by-side} instead.
+
+@menu
+* sdiff Options::	Summary of options to @code{diff}.
+@end menu
+
+@node sdiff Options, , , Invoking sdiff
+@section Options to @code{sdiff}
+@cindex @code{sdiff} options
+@cindex options for @code{sdiff}
+
+Below is a summary of all of the options that GNU @code{sdiff} accepts.
+Each option has two equivalent names, one of which is a single
+letter preceded by @samp{-}, and the other of which is a long name
+preceded by @samp{--}.  Multiple single letter options (unless they take
+an argument) can be combined into a single command line argument.  Long
+named options can be abbreviated to any unique prefix of their name.
+
+@table @samp
+@item -a
+Treat all files as text and compare them line-by-line, even if they
+do not appear to be text.  @xref{Binary}.
+
+@item -b
+Ignore changes in amount of white space.  @xref{White Space}.
+
+@item -B
+Ignore changes that just insert or delete blank lines.  @xref{Blank
+Lines}.
+
+@item -d
+Change the algorithm to perhaps find a smaller set of changes.  This
+makes @code{sdiff} slower (sometimes much slower).  @xref{diff
+Performance}.
+
+@item -H
+Use heuristics to speed handling of large files that have numerous
+scattered small changes.  @xref{diff Performance}.
+
+@item --expand-tabs
+Expand tabs to spaces in the output, to preserve the alignment of tabs
+in the input files.  @xref{Tabs}.
+
+@item -i
+Ignore changes in case; consider upper- and lower-case to be the same.
+@xref{Case Folding}.
+
+@item -I @var{regexp}
+Ignore changes that just insert or delete lines that match @var{regexp}.
+@xref{Specified Folding}.
+
+@item --ignore-all-space
+Ignore white space when comparing lines.  @xref{White Space}.
+
+@item --ignore-blank-lines
+Ignore changes that just insert or delete blank lines.  @xref{Blank
+Lines}.
+
+@item --ignore-case
+Ignore changes in case; consider upper- and lower-case to be the same.
+@xref{Case Folding}.
+
+@item --ignore-matching-lines=@var{regexp}
+Ignore changes that just insert or delete lines that match @var{regexp}.
+@xref{Specified Folding}.
+
+@item --ignore-space-change
+Ignore changes in amount of white space.
+@xref{White Space}.
+
+@item -l
+@itemx --left-column
+Print only the left column of two common lines.
+@xref{Side by Side Format}.
+
+@item --minimal
+Change the algorithm to perhaps find a smaller set of changes.  This
+makes @code{sdiff} slower (sometimes much slower).  @xref{diff
+Performance}.
+
+@item -o @var{file}
+@itemx --output=@var{file}
+Put merged output into @var{file}.  This option is required for merging.
+
+@item -s
+@itemx --suppress-common-lines
+Do not print common lines.  @xref{Side by Side Format}.
+
+@item --speed-large-files
+Use heuristics to speed handling of large files that have numerous
+scattered small changes.  @xref{diff Performance}.
+
+@item -t
+Expand tabs to spaces in the output, to preserve the alignment of tabs
+in the input files.  @xref{Tabs}.
+
+@item --text
+Treat all files as text and compare them line-by-line, even if they
+do not appear to be text.  @xref{Binary}.
+
+@item -v
+@itemx --version
+Output the version number of @code{sdiff}.
+
+@item -w @var{columns}
+@itemx --width=@var{columns}
+Use an output width of @var{columns}.  @xref{Side by Side Format}.
+Note that for historical reasons, this option is @samp{-W} in @code{diff},
+@samp{-w} in @code{sdiff}.
+
+@item -W
+Ignore horizontal white space when comparing lines.  @xref{White Space}.
+Note that for historical reasons, this option is @samp{-w} in @code{diff},
+@samp{-W} in @code{sdiff}.
+@end table
+
+@node Incomplete Lines, Projects, Invoking sdiff, Top
+@chapter Incomplete Lines
+@cindex incomplete lines
+@cindex full lines
+@cindex newline treatment by @code{diff}
+
+When an input file ends in a non-newline character, its last line is
+called an @dfn{incomplete line} because its last character is not a
+newline.  All other lines are called @dfn{full lines} and end in a
+newline character.  Incomplete lines do not match full lines unless
+differences in white space are ignored (@pxref{White Space}).
+
+An incomplete line is normally distinguished on output from a full line
+by a following line that starts with @samp{\}.  However, the RCS format
+(@pxref{RCS}) outputs the incomplete line as-is, without any trailing
+newline or following line.  The side by side format normally represents
+incomplete lines as-is, but in some cases uses a @samp{\} or @samp{/}
+gutter marker; @xref{Side by Side}.  The if-then-else line format
+preserves a line's incompleteness with @samp{%L}, and discards the
+newline with @samp{%l}; @xref{Line Formats}.  Finally, with the
+@code{ed} and forward @code{ed} output formats (@pxref{Output Formats})
+@code{diff} cannot represent an incomplete line, so it pretends there
+was a newline and reports an error.
+
+For example, suppose @file{F} and @file{G} are one-byte files that
+contain just @samp{f} and @samp{g}, respectively.  Then @samp{diff F G}
+outputs
+
+@example
+1c1
+< f
+\ No newline at end of file
+---
+> g
+\ No newline at end of file
+@end example
+
+@noindent
+(The exact message may differ in non-English locales.)
+@samp{diff -n F G} outputs the following without a trailing newline:
+
+@example
+d1 1
+a1 1
+g
+@end example
+
+@samp{diff -e F G} reports two errors and outputs the following:
+
+@example
+1c
+g
+.
+@end example
+
+@node Projects, Concept Index, Incomplete Lines, Top
+@chapter Future Projects
+
+Here are some ideas for improving GNU @code{diff} and @code{patch}.  The
+GNU project has identified some improvements as potential programming
+projects for volunteers.  You can also help by reporting any bugs that
+you find.
+
+If you are a programmer and would like to contribute something to the
+GNU project, please consider volunteering for one of these projects.  If
+you are seriously contemplating work, please write to
+@samp{gnu@@prep.ai.mit.edu} to coordinate with other volunteers.
+
+@menu
+* Shortcomings::	Suggested projects for improvements.
+* Bugs::		Reporting bugs.
+@end menu
+
+@node Shortcomings, Bugs, , Projects
+@section Suggested Projects for Improving GNU @code{diff} and @code{patch}
+@cindex projects for directories
+
+One should be able to use GNU @code{diff} to generate a patch from any
+pair of directory trees, and given the patch and a copy of one such
+tree, use @code{patch} to generate a faithful copy of the other.
+Unfortunately, some changes to directory trees cannot be expressed using
+current patch formats; also, @code{patch} does not handle some of the
+existing formats.  These shortcomings motivate the following suggested
+projects.
+
+@menu
+* Changing Structure::	Handling changes to the directory structure.
+* Special Files::	Handling symbolic links, device special files, etc.
+* Unusual File Names::	Handling file names that contain unusual characters.
+* Arbitrary Limits::	Patching non-text files.
+* Large Files::		Handling files that do not fit in memory.
+* Ignoring Changes::	Ignoring certain changes while showing others.
+@end menu
+
+@node Changing Structure, Special Files, , Shortcomings
+@subsection Handling Changes to the Directory Structure
+@cindex directory structure changes
+
+@code{diff} and @code{patch} do not handle some changes to directory
+structure.  For example, suppose one directory tree contains a directory
+named @samp{D} with some subsidiary files, and another contains a file
+with the same name @samp{D}.  @samp{diff -r} does not output enough
+information for @code{patch} to transform the the directory subtree into
+the file.
+
+There should be a way to specify that a file has been deleted without
+having to include its entire contents in the patch file.  There should
+also be a way to tell @code{patch} that a file was renamed, even if
+there is no way for @code{diff} to generate such information.
+
+These problems can be fixed by extending the @code{diff} output format
+to represent changes in directory structure, and extending @code{patch}
+to understand these extensions.
+
+@node Special Files, Unusual File Names, Changing Structure, Shortcomings
+@subsection Files that are Neither Directories Nor Regular Files
+@cindex special files
+
+Some files are neither directories nor regular files: they are unusual
+files like symbolic links, device special files, named pipes, and
+sockets.  Currently, @code{diff} treats symbolic links like regular files;
+it treats other special files like regular files if they are specified
+at the top level, but simply reports their presence when comparing
+directories.  This means that @code{patch} cannot represent changes
+to such files.  For example, if you change which file a symbolic link
+points to, @code{diff} outputs the difference between the two files,
+instead of the change to the symbolic link.
+
+@c This might not be a good idea; is it wise for root to install devices
+@c this way?
+@code{diff} should optionally report changes to special files specially,
+and @code{patch} should be extended to understand these extensions.
+
+@node Unusual File Names, Arbitrary Limits, Special Files, Shortcomings
+@subsection File Names that Contain Unusual Characters
+@cindex file names with unusual characters
+
+When a file name contains an unusual character like a newline or
+white space, @samp{diff -r} generates a patch that @code{patch} cannot
+parse.  The problem is with format of @code{diff} output, not just with
+@code{patch}, because with odd enough file names one can cause
+@code{diff} to generate a patch that is syntactically correct but
+patches the wrong files.  The format of @code{diff} output should be
+extended to handle all possible file names.
+
+@node Arbitrary Limits, Large Files, Unusual File Names, Shortcomings
+@subsection Arbitrary Limits
+@cindex binary file patching
+
+GNU @code{diff} can analyze files with arbitrarily long lines and files
+that end in incomplete lines.  However, @code{patch} cannot patch such
+files.  The @code{patch} internal limits on line lengths should be
+removed, and @code{patch} should be extended to parse @code{diff}
+reports of incomplete lines.
+
+@node Large Files, Ignoring Changes, Arbitrary Limits, Shortcomings
+@subsection Handling Files that Do Not Fit in Memory
+@cindex large files
+
+@code{diff} operates by reading both files into memory.  This method
+fails if the files are too large, and @code{diff} should have a fallback.
+
+One way to do this is to scan the files sequentially to compute hash
+codes of the lines and put the lines in equivalence classes based only
+on hash code.  Then compare the files normally.  This does produce some
+false matches.
+
+Then scan the two files sequentially again, checking each match to see
+whether it is real.  When a match is not real, mark both the
+``matching'' lines as changed.  Then build an edit script as usual.
+
+The output routines would have to be changed to scan the files
+sequentially looking for the text to print.
+
+@node Ignoring Changes,, Large Files, Shortcomings
+@subsection Ignoring Certain Changes
+
+It would be nice to have a feature for specifying two strings, one in
+@var{from-file} and one in @var{to-file}, which should be considered to
+match.  Thus, if the two strings are @samp{foo} and @samp{bar}, then if
+two lines differ only in that @samp{foo} in file 1 corresponds to
+@samp{bar} in file 2, the lines are treated as identical.
+
+It is not clear how general this feature can or should be, or
+what syntax should be used for it.
+
+@node Bugs, , Shortcomings, Projects
+@section Reporting Bugs
+@cindex bug reports
+@cindex reporting bugs
+
+If you think you have found a bug in GNU @code{cmp}, @code{diff},
+@code{diff3}, @code{sdiff}, or @code{patch}, please report it by
+electronic mail to @samp{bug-gnu-utils@@prep.ai.mit.edu}.  Send as
+precise a description of the problem as you can, including sample input
+files that produce the bug, if applicable.
+
+Because Larry Wall has not released a new version of @code{patch} since
+mid 1988 and the GNU version of @code{patch} has been changed since
+then, please send bug reports for @code{patch} by electronic mail to
+both @samp{bug-gnu-utils@@prep.ai.mit.edu} and
+@samp{lwall@@netlabs.com}.
+
+@node Concept Index, , Projects, Top
+@unnumbered Concept Index
+
+@printindex cp
+
+@shortcontents
+@contents
+@bye

+ 1778 - 0
sys/src/ape/cmd/diff/diff3.c

@@ -0,0 +1,1778 @@
+/* Three way file comparison program (diff3) for Project GNU.
+   Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+/* Written by Randy Smith */
+
+#include "system.h"
+#include <stdio.h>
+#include <signal.h>
+#include "getopt.h"
+
+extern char const version_string[];
+
+/*
+ * Internal data structures and macros for the diff3 program; includes
+ * data structures for both diff3 diffs and normal diffs.
+ */
+
+/* Different files within a three way diff.  */
+#define	FILE0	0
+#define	FILE1	1
+#define	FILE2	2
+
+/*
+ * A three way diff is built from two two-way diffs; the file which
+ * the two two-way diffs share is:
+ */
+#define	FILEC	FILE2
+
+/*
+ * Different files within a two way diff.
+ * FC is the common file, FO the other file.
+ */
+#define FO 0
+#define FC 1
+
+/* The ranges are indexed by */
+#define	START	0
+#define	END	1
+
+enum diff_type {
+  ERROR,			/* Should not be used */
+  ADD,				/* Two way diff add */
+  CHANGE,			/* Two way diff change */
+  DELETE,			/* Two way diff delete */
+  DIFF_ALL,			/* All three are different */
+  DIFF_1ST,			/* Only the first is different */
+  DIFF_2ND,			/* Only the second */
+  DIFF_3RD			/* Only the third */
+};
+
+/* Two way diff */
+struct diff_block {
+  int ranges[2][2];		/* Ranges are inclusive */
+  char **lines[2];		/* The actual lines (may contain nulls) */
+  size_t *lengths[2];		/* Line lengths (including newlines, if any) */
+  struct diff_block *next;
+};
+
+/* Three way diff */
+
+struct diff3_block {
+  enum diff_type correspond;	/* Type of diff */
+  int ranges[3][2];		/* Ranges are inclusive */
+  char **lines[3];		/* The actual lines (may contain nulls) */
+  size_t *lengths[3];		/* Line lengths (including newlines, if any) */
+  struct diff3_block *next;
+};
+
+/*
+ * Access the ranges on a diff block.
+ */
+#define	D_LOWLINE(diff, filenum)	\
+  ((diff)->ranges[filenum][START])
+#define	D_HIGHLINE(diff, filenum)	\
+  ((diff)->ranges[filenum][END])
+#define	D_NUMLINES(diff, filenum)	\
+  (D_HIGHLINE (diff, filenum) - D_LOWLINE (diff, filenum) + 1)
+
+/*
+ * Access the line numbers in a file in a diff by relative line
+ * numbers (i.e. line number within the diff itself).  Note that these
+ * are lvalues and can be used for assignment.
+ */
+#define	D_RELNUM(diff, filenum, linenum)	\
+  ((diff)->lines[filenum][linenum])
+#define	D_RELLEN(diff, filenum, linenum)	\
+  ((diff)->lengths[filenum][linenum])
+
+/*
+ * And get at them directly, when that should be necessary.
+ */
+#define	D_LINEARRAY(diff, filenum)	\
+  ((diff)->lines[filenum])
+#define	D_LENARRAY(diff, filenum)	\
+  ((diff)->lengths[filenum])
+
+/*
+ * Next block.
+ */
+#define	D_NEXT(diff)	((diff)->next)
+
+/*
+ * Access the type of a diff3 block.
+ */
+#define	D3_TYPE(diff)	((diff)->correspond)
+
+/*
+ * Line mappings based on diffs.  The first maps off the top of the
+ * diff, the second off of the bottom.
+ */
+#define	D_HIGH_MAPLINE(diff, fromfile, tofile, lineno)	\
+  ((lineno)						\
+   - D_HIGHLINE ((diff), (fromfile))			\
+   + D_HIGHLINE ((diff), (tofile)))
+
+#define	D_LOW_MAPLINE(diff, fromfile, tofile, lineno)	\
+  ((lineno)						\
+   - D_LOWLINE ((diff), (fromfile))			\
+   + D_LOWLINE ((diff), (tofile)))
+
+/*
+ * General memory allocation function.
+ */
+#define	ALLOCATE(number, type)	\
+  (type *) xmalloc ((number) * sizeof (type))
+
+/* Options variables for flags set on command line.  */
+
+/* If nonzero, treat all files as text files, never as binary.  */
+static int always_text;
+
+/* If nonzero, write out an ed script instead of the standard diff3 format.  */
+static int edscript;
+
+/* If nonzero, in the case of overlapping diffs (type DIFF_ALL),
+   preserve the lines which would normally be deleted from
+   file 1 with a special flagging mechanism.  */
+static int flagging;
+
+/* Number of lines to keep in identical prefix and suffix.  */
+static int horizon_lines = 10;
+
+/* Use a tab to align output lines (-T).  */
+static int tab_align_flag;
+
+/* If nonzero, do not output information for overlapping diffs.  */
+static int simple_only;
+
+/* If nonzero, do not output information for non-overlapping diffs.  */
+static int overlap_only;
+
+/* If nonzero, show information for DIFF_2ND diffs.  */
+static int show_2nd;
+
+/* If nonzero, include `:wq' at the end of the script
+   to write out the file being edited.   */
+static int finalwrite;
+
+/* If nonzero, output a merged file.  */
+static int merge;
+
+static char *program_name;
+
+static VOID *xmalloc PARAMS((size_t));
+static VOID *xrealloc PARAMS((VOID *, size_t));
+
+static char *read_diff PARAMS((char const *, char const *, char **));
+static char *scan_diff_line PARAMS((char *, char **, size_t *, char *, int));
+static enum diff_type process_diff_control PARAMS((char **, struct diff_block *));
+static int compare_line_list PARAMS((char * const[], size_t const[], char * const[], size_t const[], int));
+static int copy_stringlist PARAMS((char * const[], size_t const[], char *[], size_t[], int));
+static int dotlines PARAMS((FILE *, struct diff3_block *, int));
+static int output_diff3_edscript PARAMS((FILE *, struct diff3_block *, int const[3], int const[3], char const *, char const *, char const *));
+static int output_diff3_merge PARAMS((FILE *, FILE *, struct diff3_block *, int const[3], int const[3], char const *, char const *, char const *));
+static size_t myread PARAMS((int, char *, size_t));
+static struct diff3_block *create_diff3_block PARAMS((int, int, int, int, int, int));
+static struct diff3_block *make_3way_diff PARAMS((struct diff_block *, struct diff_block *));
+static struct diff3_block *reverse_diff3_blocklist PARAMS((struct diff3_block *));
+static struct diff3_block *using_to_diff3_block PARAMS((struct diff_block *[2], struct diff_block *[2], int, int, struct diff3_block const *));
+static struct diff_block *process_diff PARAMS((char const *, char const *, struct diff_block **));
+static void check_stdout PARAMS((void));
+static void fatal PARAMS((char const *));
+static void output_diff3 PARAMS((FILE *, struct diff3_block *, int const[3], int const[3]));
+static void perror_with_exit PARAMS((char const *));
+static void try_help PARAMS((char const *));
+static void undotlines PARAMS((FILE *, int, int, int));
+static void usage PARAMS((void));
+
+static char const diff_program[] = DIFF_PROGRAM;
+
+static struct option const longopts[] =
+{
+  {"text", 0, 0, 'a'},
+  {"show-all", 0, 0, 'A'},
+  {"ed", 0, 0, 'e'},
+  {"show-overlap", 0, 0, 'E'},
+  {"label", 1, 0, 'L'},
+  {"merge", 0, 0, 'm'},
+  {"initial-tab", 0, 0, 'T'},
+  {"overlap-only", 0, 0, 'x'},
+  {"easy-only", 0, 0, '3'},
+  {"version", 0, 0, 'v'},
+  {"help", 0, 0, 129},
+  {0, 0, 0, 0}
+};
+
+/*
+ * Main program.  Calls diff twice on two pairs of input files,
+ * combines the two diffs, and outputs them.
+ */
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int c, i;
+  int mapping[3];
+  int rev_mapping[3];
+  int incompat = 0;
+  int conflicts_found;
+  struct diff_block *thread0, *thread1, *last_block;
+  struct diff3_block *diff3;
+  int tag_count = 0;
+  char *tag_strings[3];
+  char *commonname;
+  char **file;
+  struct stat statb;
+
+  initialize_main (&argc, &argv);
+  program_name = argv[0];
+
+  while ((c = getopt_long (argc, argv, "aeimvx3AEL:TX", longopts, 0)) != EOF)
+    {
+      switch (c)
+	{
+	case 'a':
+	  always_text = 1;
+	  break;
+	case 'A':
+	  show_2nd = 1;
+	  flagging = 1;
+	  incompat++;
+	  break;
+	case 'x':
+	  overlap_only = 1;
+	  incompat++;
+	  break;
+	case '3':
+	  simple_only = 1;
+	  incompat++;
+	  break;
+	case 'i':
+	  finalwrite = 1;
+	  break;
+	case 'm':
+	  merge = 1;
+	  break;
+	case 'X':
+	  overlap_only = 1;
+	  /* Falls through */
+	case 'E':
+	  flagging = 1;
+	  /* Falls through */
+	case 'e':
+	  incompat++;
+	  break;
+	case 'T':
+	  tab_align_flag = 1;
+	  break;
+	case 'v':
+	  printf ("diff3 - GNU diffutils version %s\n", version_string);
+	  exit (0);
+	case 129:
+	  usage ();
+	  check_stdout ();
+	  exit (0);
+	case 'L':
+	  /* Handle up to three -L options.  */
+	  if (tag_count < 3)
+	    {
+	      tag_strings[tag_count++] = optarg;
+	      break;
+	    }
+	  try_help ("Too many labels were given.  The limit is 3.");
+	default:
+	  try_help (0);
+	}
+    }
+
+  edscript = incompat & ~merge;  /* -AeExX3 without -m implies ed script.  */
+  show_2nd |= ~incompat & merge;  /* -m without -AeExX3 implies -A.  */
+  flagging |= ~incompat & merge;
+
+  if (incompat > 1  /* Ensure at most one of -AeExX3.  */
+      || finalwrite & merge /* -i -m would rewrite input file.  */
+      || (tag_count && ! flagging)) /* -L requires one of -AEX.  */
+    try_help ("incompatible options");
+
+  if (argc - optind != 3)
+    try_help (argc - optind < 3 ? "missing operand" : "extra operand");
+
+  file = &argv[optind];
+
+  for (i = tag_count; i < 3; i++)
+    tag_strings[i] = file[i];
+
+  /* Always compare file1 to file2, even if file2 is "-".
+     This is needed for -mAeExX3.  Using the file0 as
+     the common file would produce wrong results, because if the
+     file0-file1 diffs didn't line up with the file0-file2 diffs
+     (which is entirely possible since we don't use diff's -n option),
+     diff3 might report phantom changes from file1 to file2.  */
+
+  if (strcmp (file[2], "-") == 0)
+    {
+      /* Sigh.  We've got standard input as the last arg.  We can't
+	 call diff twice on stdin.  Use the middle arg as the common
+	 file instead.  */
+      if (strcmp (file[0], "-") == 0 || strcmp (file[1], "-") == 0)
+	fatal ("`-' specified for more than one input file");
+      mapping[0] = 0;
+      mapping[1] = 2;
+      mapping[2] = 1;
+    }
+  else
+    {
+      /* Normal, what you'd expect */
+      mapping[0] = 0;
+      mapping[1] = 1;
+      mapping[2] = 2;
+    }
+
+  for (i = 0; i < 3; i++)
+    rev_mapping[mapping[i]] = i;
+
+  for (i = 0; i < 3; i++)
+    if (strcmp (file[i], "-") != 0)
+      {
+	if (stat (file[i], &statb) < 0)
+	  perror_with_exit (file[i]);
+	else if (S_ISDIR(statb.st_mode))
+	  {
+	    fprintf (stderr, "%s: %s: Is a directory\n",
+		     program_name, file[i]);
+	    exit (2);
+	  }
+      }
+
+#if !defined(SIGCHLD) && defined(SIGCLD)
+#define SIGCHLD SIGCLD
+#endif
+#ifdef SIGCHLD
+  /* System V fork+wait does not work if SIGCHLD is ignored.  */
+  signal (SIGCHLD, SIG_DFL);
+#endif
+
+  commonname = file[rev_mapping[FILEC]];
+  thread1 = process_diff (file[rev_mapping[FILE1]], commonname, &last_block);
+  if (thread1)
+    for (i = 0; i < 2; i++)
+      {
+	horizon_lines = max (horizon_lines, D_NUMLINES (thread1, i));
+	horizon_lines = max (horizon_lines, D_NUMLINES (last_block, i));
+      }
+  thread0 = process_diff (file[rev_mapping[FILE0]], commonname, &last_block);
+  diff3 = make_3way_diff (thread0, thread1);
+  if (edscript)
+    conflicts_found
+      = output_diff3_edscript (stdout, diff3, mapping, rev_mapping,
+			       tag_strings[0], tag_strings[1], tag_strings[2]);
+  else if (merge)
+    {
+      if (! freopen (file[rev_mapping[FILE0]], "r", stdin))
+	perror_with_exit (file[rev_mapping[FILE0]]);
+      conflicts_found
+	= output_diff3_merge (stdin, stdout, diff3, mapping, rev_mapping,
+			      tag_strings[0], tag_strings[1], tag_strings[2]);
+      if (ferror (stdin))
+	fatal ("read error");
+    }
+  else
+    {
+      output_diff3 (stdout, diff3, mapping, rev_mapping);
+      conflicts_found = 0;
+    }
+
+  check_stdout ();
+  exit (conflicts_found);
+  return conflicts_found;
+}
+
+static void
+try_help (reason)
+     char const *reason;
+{
+  if (reason)
+    fprintf (stderr, "%s: %s\n", program_name, reason);
+  fprintf (stderr, "%s: Try `%s --help' for more information.\n",
+	   program_name, program_name);
+  exit (2);
+}
+
+static void
+check_stdout ()
+{
+  if (ferror (stdout) || fclose (stdout) != 0)
+    fatal ("write error");
+}
+
+/*
+ * Explain, patiently and kindly, how to use this program.
+ */
+static void
+usage ()
+{
+  printf ("Usage: %s [OPTION]... MYFILE OLDFILE YOURFILE\n\n", program_name);
+
+  printf ("%s", "\
+  -e  --ed  Output unmerged changes from OLDFILE to YOURFILE into MYFILE.\n\
+  -E  --show-overlap  Output unmerged changes, bracketing conflicts.\n\
+  -A  --show-all  Output all changes, bracketing conflicts.\n\
+  -x  --overlap-only  Output overlapping changes.\n\
+  -X  Output overlapping changes, bracketing them.\n\
+  -3  --easy-only  Output unmerged nonoverlapping changes.\n\n");
+  printf ("%s", "\
+  -m  --merge  Output merged file instead of ed script (default -A).\n\
+  -L LABEL  --label=LABEL  Use LABEL instead of file name.\n\
+  -i  Append `w' and `q' commands to ed scripts.\n\
+  -a  --text  Treat all files as text.\n\
+  -T  --initial-tab  Make tabs line up by prepending a tab.\n\n");
+  printf ("%s", "\
+  -v  --version  Output version info.\n\
+  --help  Output this help.\n\n");
+  printf ("If a FILE is `-', read standard input.\n");
+}
+
+/*
+ * Routines that combine the two diffs together into one.  The
+ * algorithm used follows:
+ *
+ *   File2 is shared in common between the two diffs.
+ *   Diff02 is the diff between 0 and 2.
+ *   Diff12 is the diff between 1 and 2.
+ *
+ *	1) Find the range for the first block in File2.
+ *	    a) Take the lowest of the two ranges (in File2) in the two
+ *	       current blocks (one from each diff) as being the low
+ *	       water mark.  Assign the upper end of this block as
+ *	       being the high water mark and move the current block up
+ *	       one.  Mark the block just moved over as to be used.
+ *	    b) Check the next block in the diff that the high water
+ *	       mark is *not* from.
+ *
+ *	       *If* the high water mark is above
+ *	       the low end of the range in that block,
+ *
+ *		   mark that block as to be used and move the current
+ *		   block up.  Set the high water mark to the max of
+ *		   the high end of this block and the current.  Repeat b.
+ *
+ *	 2) Find the corresponding ranges in File0 (from the blocks
+ *	    in diff02; line per line outside of diffs) and in File1.
+ *	    Create a diff3_block, reserving space as indicated by the ranges.
+ *
+ *	 3) Copy all of the pointers for file2 in.  At least for now,
+ *	    do memcmp's between corresponding strings in the two diffs.
+ *
+ *	 4) Copy all of the pointers for file0 and 1 in.  Get what you
+ *	    need from file2 (when there isn't a diff block, it's
+ *	    identical to file2 within the range between diff blocks).
+ *
+ *	 5) If the diff blocks you used came from only one of the two
+ *	    strings of diffs, then that file (i.e. the one other than
+ *	    the common file in that diff) is the odd person out.  If you used
+ *	    diff blocks from both sets, check to see if files 0 and 1 match:
+ *
+ *		Same number of lines?  If so, do a set of memcmp's (if a
+ *	    memcmp matches; copy the pointer over; it'll be easier later
+ *	    if you have to do any compares).  If they match, 0 & 1 are
+ *	    the same.  If not, all three different.
+ *
+ *   Then you do it again, until you run out of blocks.
+ *
+ */
+
+/*
+ * This routine makes a three way diff (chain of diff3_block's) from two
+ * two way diffs (chains of diff_block's).  It is assumed that each of
+ * the two diffs passed are onto the same file (i.e. that each of the
+ * diffs were made "to" the same file).  The three way diff pointer
+ * returned will have numbering FILE0--the other file in diff02,
+ * FILE1--the other file in diff12, and FILEC--the common file.
+ */
+static struct diff3_block *
+make_3way_diff (thread0, thread1)
+     struct diff_block *thread0, *thread1;
+{
+/*
+ * This routine works on the two diffs passed to it as threads.
+ * Thread number 0 is diff02, thread number 1 is diff12.  The USING
+ * array is set to the base of the list of blocks to be used to
+ * construct each block of the three way diff; if no blocks from a
+ * particular thread are to be used, that element of the using array
+ * is set to 0.  The elements LAST_USING array are set to the last
+ * elements on each of the using lists.
+ *
+ * The HIGH_WATER_MARK is set to the highest line number in the common file
+ * described in any of the diffs in either of the USING lists.  The
+ * HIGH_WATER_THREAD names the thread.  Similarly the BASE_WATER_MARK
+ * and BASE_WATER_THREAD describe the lowest line number in the common file
+ * described in any of the diffs in either of the USING lists.  The
+ * HIGH_WATER_DIFF is the diff from which the HIGH_WATER_MARK was
+ * taken.
+ *
+ * The HIGH_WATER_DIFF should always be equal to LAST_USING
+ * [HIGH_WATER_THREAD].  The OTHER_DIFF is the next diff to check for
+ * higher water, and should always be equal to
+ * CURRENT[HIGH_WATER_THREAD ^ 0x1].  The OTHER_THREAD is the thread
+ * in which the OTHER_DIFF is, and hence should always be equal to
+ * HIGH_WATER_THREAD ^ 0x1.
+ *
+ * The variable LAST_DIFF is kept set to the last diff block produced
+ * by this routine, for line correspondence purposes between that diff
+ * and the one currently being worked on.  It is initialized to
+ * ZERO_DIFF before any blocks have been created.
+ */
+
+  struct diff_block
+    *using[2],
+    *last_using[2],
+    *current[2];
+
+  int
+    high_water_mark;
+
+  int
+    high_water_thread,
+    base_water_thread,
+    other_thread;
+
+  struct diff_block
+    *high_water_diff,
+    *other_diff;
+
+  struct diff3_block
+    *result,
+    *tmpblock,
+    **result_end;
+
+  struct diff3_block const *last_diff3;
+
+  static struct diff3_block const zero_diff3;
+
+  /* Initialization */
+  result = 0;
+  result_end = &result;
+  current[0] = thread0; current[1] = thread1;
+  last_diff3 = &zero_diff3;
+
+  /* Sniff up the threads until we reach the end */
+
+  while (current[0] || current[1])
+    {
+      using[0] = using[1] = last_using[0] = last_using[1] = 0;
+
+      /* Setup low and high water threads, diffs, and marks.  */
+      if (!current[0])
+	base_water_thread = 1;
+      else if (!current[1])
+	base_water_thread = 0;
+      else
+	base_water_thread =
+	  (D_LOWLINE (current[0], FC) > D_LOWLINE (current[1], FC));
+
+      high_water_thread = base_water_thread;
+
+      high_water_diff = current[high_water_thread];
+
+#if 0
+      /* low and high waters start off same diff */
+      base_water_mark = D_LOWLINE (high_water_diff, FC);
+#endif
+
+      high_water_mark = D_HIGHLINE (high_water_diff, FC);
+
+      /* Make the diff you just got info from into the using class */
+      using[high_water_thread]
+	= last_using[high_water_thread]
+	= high_water_diff;
+      current[high_water_thread] = high_water_diff->next;
+      last_using[high_water_thread]->next = 0;
+
+      /* And mark the other diff */
+      other_thread = high_water_thread ^ 0x1;
+      other_diff = current[other_thread];
+
+      /* Shuffle up the ladder, checking the other diff to see if it
+	 needs to be incorporated.  */
+      while (other_diff
+	     && D_LOWLINE (other_diff, FC) <= high_water_mark + 1)
+	{
+
+	  /* Incorporate this diff into the using list.  Note that
+	     this doesn't take it off the current list */
+	  if (using[other_thread])
+	    last_using[other_thread]->next = other_diff;
+	  else
+	    using[other_thread] = other_diff;
+	  last_using[other_thread] = other_diff;
+
+	  /* Take it off the current list.  Note that this following
+	     code assumes that other_diff enters it equal to
+	     current[high_water_thread ^ 0x1] */
+	  current[other_thread] = current[other_thread]->next;
+	  other_diff->next = 0;
+
+	  /* Set the high_water stuff
+	     If this comparison is equal, then this is the last pass
+	     through this loop; since diff blocks within a given
+	     thread cannot overlap, the high_water_mark will be
+	     *below* the range_start of either of the next diffs.  */
+
+	  if (high_water_mark < D_HIGHLINE (other_diff, FC))
+	    {
+	      high_water_thread ^= 1;
+	      high_water_diff = other_diff;
+	      high_water_mark = D_HIGHLINE (other_diff, FC);
+	    }
+
+	  /* Set the other diff */
+	  other_thread = high_water_thread ^ 0x1;
+	  other_diff = current[other_thread];
+	}
+
+      /* The using lists contain a list of all of the blocks to be
+	 included in this diff3_block.  Create it.  */
+
+      tmpblock = using_to_diff3_block (using, last_using,
+				       base_water_thread, high_water_thread,
+				       last_diff3);
+
+      if (!tmpblock)
+	fatal ("internal error: screwup in format of diff blocks");
+
+      /* Put it on the list.  */
+      *result_end = tmpblock;
+      result_end = &tmpblock->next;
+
+      /* Set up corresponding lines correctly.  */
+      last_diff3 = tmpblock;
+    }
+  return result;
+}
+
+/*
+ * using_to_diff3_block:
+ *   This routine takes two lists of blocks (from two separate diff
+ * threads) and puts them together into one diff3 block.
+ * It then returns a pointer to this diff3 block or 0 for failure.
+ *
+ * All arguments besides using are for the convenience of the routine;
+ * they could be derived from the using array.
+ * LAST_USING is a pair of pointers to the last blocks in the using
+ * structure.
+ * LOW_THREAD and HIGH_THREAD tell which threads contain the lowest
+ * and highest line numbers for File0.
+ * last_diff3 contains the last diff produced in the calling routine.
+ * This is used for lines mappings which would still be identical to
+ * the state that diff ended in.
+ *
+ * A distinction should be made in this routine between the two diffs
+ * that are part of a normal two diff block, and the three diffs that
+ * are part of a diff3_block.
+ */
+static struct diff3_block *
+using_to_diff3_block (using, last_using, low_thread, high_thread, last_diff3)
+     struct diff_block
+       *using[2],
+       *last_using[2];
+     int low_thread, high_thread;
+     struct diff3_block const *last_diff3;
+{
+  int low[2], high[2];
+  struct diff3_block *result;
+  struct diff_block *ptr;
+  int d, i;
+
+  /* Find the range in the common file.  */
+  int lowc = D_LOWLINE (using[low_thread], FC);
+  int highc = D_HIGHLINE (last_using[high_thread], FC);
+
+  /* Find the ranges in the other files.
+     If using[d] is null, that means that the file to which that diff
+     refers is equivalent to the common file over this range.  */
+
+  for (d = 0; d < 2; d++)
+    if (using[d])
+      {
+	low[d] = D_LOW_MAPLINE (using[d], FC, FO, lowc);
+	high[d] = D_HIGH_MAPLINE (last_using[d], FC, FO, highc);
+      }
+    else
+      {
+	low[d] = D_HIGH_MAPLINE (last_diff3, FILEC, FILE0 + d, lowc);
+	high[d] = D_HIGH_MAPLINE (last_diff3, FILEC, FILE0 + d, highc);
+      }
+
+  /* Create a block with the appropriate sizes */
+  result = create_diff3_block (low[0], high[0], low[1], high[1], lowc, highc);
+
+  /* Copy information for the common file.
+     Return with a zero if any of the compares failed.  */
+
+  for (d = 0; d < 2; d++)
+    for (ptr = using[d]; ptr; ptr = D_NEXT (ptr))
+      {
+	int result_offset = D_LOWLINE (ptr, FC) - lowc;
+
+	if (!copy_stringlist (D_LINEARRAY (ptr, FC),
+			      D_LENARRAY (ptr, FC),
+			      D_LINEARRAY (result, FILEC) + result_offset,
+			      D_LENARRAY (result, FILEC) + result_offset,
+			      D_NUMLINES (ptr, FC)))
+	  return 0;
+      }
+
+  /* Copy information for file d.  First deal with anything that might be
+     before the first diff.  */
+
+  for (d = 0; d < 2; d++)
+    {
+      struct diff_block *u = using[d];
+      int lo = low[d], hi = high[d];
+
+      for (i = 0;
+	   i + lo < (u ? D_LOWLINE (u, FO) : hi + 1);
+	   i++)
+	{
+	  D_RELNUM (result, FILE0 + d, i) = D_RELNUM (result, FILEC, i);
+	  D_RELLEN (result, FILE0 + d, i) = D_RELLEN (result, FILEC, i);
+	}
+
+      for (ptr = u; ptr; ptr = D_NEXT (ptr))
+	{
+	  int result_offset = D_LOWLINE (ptr, FO) - lo;
+	  int linec;
+
+	  if (!copy_stringlist (D_LINEARRAY (ptr, FO),
+				D_LENARRAY (ptr, FO),
+				D_LINEARRAY (result, FILE0 + d) + result_offset,
+				D_LENARRAY (result, FILE0 + d) + result_offset,
+				D_NUMLINES (ptr, FO)))
+	    return 0;
+
+	  /* Catch the lines between here and the next diff */
+	  linec = D_HIGHLINE (ptr, FC) + 1 - lowc;
+	  for (i = D_HIGHLINE (ptr, FO) + 1 - lo;
+	       i < (D_NEXT (ptr) ? D_LOWLINE (D_NEXT (ptr), FO) : hi + 1) - lo;
+	       i++)
+	    {
+	      D_RELNUM (result, FILE0 + d, i) = D_RELNUM (result, FILEC, linec);
+	      D_RELLEN (result, FILE0 + d, i) = D_RELLEN (result, FILEC, linec);
+	      linec++;
+	    }
+	}
+    }
+
+  /* Set correspond */
+  if (!using[0])
+    D3_TYPE (result) = DIFF_2ND;
+  else if (!using[1])
+    D3_TYPE (result) = DIFF_1ST;
+  else
+    {
+      int nl0 = D_NUMLINES (result, FILE0);
+      int nl1 = D_NUMLINES (result, FILE1);
+
+      if (nl0 != nl1
+	  || !compare_line_list (D_LINEARRAY (result, FILE0),
+				 D_LENARRAY (result, FILE0),
+				 D_LINEARRAY (result, FILE1),
+				 D_LENARRAY (result, FILE1),
+				 nl0))
+	D3_TYPE (result) = DIFF_ALL;
+      else
+	D3_TYPE (result) = DIFF_3RD;
+    }
+
+  return result;
+}
+
+/*
+ * This routine copies pointers from a list of strings to a different list
+ * of strings.  If a spot in the second list is already filled, it
+ * makes sure that it is filled with the same string; if not it
+ * returns 0, the copy incomplete.
+ * Upon successful completion of the copy, it returns 1.
+ */
+static int
+copy_stringlist (fromptrs, fromlengths, toptrs, tolengths, copynum)
+     char * const fromptrs[];
+     char *toptrs[];
+     size_t const fromlengths[];
+     size_t tolengths[];
+     int copynum;
+{
+  register char * const *f = fromptrs;
+  register char **t = toptrs;
+  register size_t const *fl = fromlengths;
+  register size_t *tl = tolengths;
+
+  while (copynum--)
+    {
+      if (*t)
+	{ if (*fl != *tl || memcmp (*f, *t, *fl)) return 0; }
+      else
+	{ *t = *f ; *tl = *fl; }
+
+      t++; f++; tl++; fl++;
+    }
+  return 1;
+}
+
+/*
+ * Create a diff3_block, with ranges as specified in the arguments.
+ * Allocate the arrays for the various pointers (and zero them) based
+ * on the arguments passed.  Return the block as a result.
+ */
+static struct diff3_block *
+create_diff3_block (low0, high0, low1, high1, low2, high2)
+     register int low0, high0, low1, high1, low2, high2;
+{
+  struct diff3_block *result = ALLOCATE (1, struct diff3_block);
+  int numlines;
+
+  D3_TYPE (result) = ERROR;
+  D_NEXT (result) = 0;
+
+  /* Assign ranges */
+  D_LOWLINE (result, FILE0) = low0;
+  D_HIGHLINE (result, FILE0) = high0;
+  D_LOWLINE (result, FILE1) = low1;
+  D_HIGHLINE (result, FILE1) = high1;
+  D_LOWLINE (result, FILE2) = low2;
+  D_HIGHLINE (result, FILE2) = high2;
+
+  /* Allocate and zero space */
+  numlines = D_NUMLINES (result, FILE0);
+  if (numlines)
+    {
+      D_LINEARRAY (result, FILE0) = ALLOCATE (numlines, char *);
+      D_LENARRAY (result, FILE0) = ALLOCATE (numlines, size_t);
+      bzero (D_LINEARRAY (result, FILE0), (numlines * sizeof (char *)));
+      bzero (D_LENARRAY (result, FILE0), (numlines * sizeof (size_t)));
+    }
+  else
+    {
+      D_LINEARRAY (result, FILE0) = 0;
+      D_LENARRAY (result, FILE0) = 0;
+    }
+
+  numlines = D_NUMLINES (result, FILE1);
+  if (numlines)
+    {
+      D_LINEARRAY (result, FILE1) = ALLOCATE (numlines, char *);
+      D_LENARRAY (result, FILE1) = ALLOCATE (numlines, size_t);
+      bzero (D_LINEARRAY (result, FILE1), (numlines * sizeof (char *)));
+      bzero (D_LENARRAY (result, FILE1), (numlines * sizeof (size_t)));
+    }
+  else
+    {
+      D_LINEARRAY (result, FILE1) = 0;
+      D_LENARRAY (result, FILE1) = 0;
+    }
+
+  numlines = D_NUMLINES (result, FILE2);
+  if (numlines)
+    {
+      D_LINEARRAY (result, FILE2) = ALLOCATE (numlines, char *);
+      D_LENARRAY (result, FILE2) = ALLOCATE (numlines, size_t);
+      bzero (D_LINEARRAY (result, FILE2), (numlines * sizeof (char *)));
+      bzero (D_LENARRAY (result, FILE2), (numlines * sizeof (size_t)));
+    }
+  else
+    {
+      D_LINEARRAY (result, FILE2) = 0;
+      D_LENARRAY (result, FILE2) = 0;
+    }
+
+  /* Return */
+  return result;
+}
+
+/*
+ * Compare two lists of lines of text.
+ * Return 1 if they are equivalent, 0 if not.
+ */
+static int
+compare_line_list (list1, lengths1, list2, lengths2, nl)
+     char * const list1[], * const list2[];
+     size_t const lengths1[], lengths2[];
+     int nl;
+{
+  char
+    * const *l1 = list1,
+    * const *l2 = list2;
+  size_t const
+    *lgths1 = lengths1,
+    *lgths2 = lengths2;
+
+  while (nl--)
+    if (!*l1 || !*l2 || *lgths1 != *lgths2++
+	|| memcmp (*l1++, *l2++, *lgths1++))
+      return 0;
+  return 1;
+}
+
+/*
+ * Routines to input and parse two way diffs.
+ */
+
+extern char **environ;
+
+static struct diff_block *
+process_diff (filea, fileb, last_block)
+     char const *filea, *fileb;
+     struct diff_block **last_block;
+{
+  char *diff_contents;
+  char *diff_limit;
+  char *scan_diff;
+  enum diff_type dt;
+  int i;
+  struct diff_block *block_list, **block_list_end, *bptr;
+
+  diff_limit = read_diff (filea, fileb, &diff_contents);
+  scan_diff = diff_contents;
+  block_list_end = &block_list;
+  bptr = 0; /* Pacify `gcc -W'.  */
+
+  while (scan_diff < diff_limit)
+    {
+      bptr = ALLOCATE (1, struct diff_block);
+      bptr->lines[0] = bptr->lines[1] = 0;
+      bptr->lengths[0] = bptr->lengths[1] = 0;
+
+      dt = process_diff_control (&scan_diff, bptr);
+      if (dt == ERROR || *scan_diff != '\n')
+	{
+	  fprintf (stderr, "%s: diff error: ", program_name);
+	  do
+	    {
+	      putc (*scan_diff, stderr);
+	    }
+	  while (*scan_diff++ != '\n');
+	  exit (2);
+	}
+      scan_diff++;
+
+      /* Force appropriate ranges to be null, if necessary */
+      switch (dt)
+	{
+	case ADD:
+	  bptr->ranges[0][0]++;
+	  break;
+	case DELETE:
+	  bptr->ranges[1][0]++;
+	  break;
+	case CHANGE:
+	  break;
+	default:
+	  fatal ("internal error: invalid diff type in process_diff");
+	  break;
+	}
+
+      /* Allocate space for the pointers for the lines from filea, and
+	 parcel them out among these pointers */
+      if (dt != ADD)
+	{
+	  int numlines = D_NUMLINES (bptr, 0);
+	  bptr->lines[0] = ALLOCATE (numlines, char *);
+	  bptr->lengths[0] = ALLOCATE (numlines, size_t);
+	  for (i = 0; i < numlines; i++)
+	    scan_diff = scan_diff_line (scan_diff,
+					&(bptr->lines[0][i]),
+					&(bptr->lengths[0][i]),
+					diff_limit,
+					'<');
+	}
+
+      /* Get past the separator for changes */
+      if (dt == CHANGE)
+	{
+	  if (strncmp (scan_diff, "---\n", 4))
+	    fatal ("invalid diff format; invalid change separator");
+	  scan_diff += 4;
+	}
+
+      /* Allocate space for the pointers for the lines from fileb, and
+	 parcel them out among these pointers */
+      if (dt != DELETE)
+	{
+	  int numlines = D_NUMLINES (bptr, 1);
+	  bptr->lines[1] = ALLOCATE (numlines, char *);
+	  bptr->lengths[1] = ALLOCATE (numlines, size_t);
+	  for (i = 0; i < numlines; i++)
+	    scan_diff = scan_diff_line (scan_diff,
+					&(bptr->lines[1][i]),
+					&(bptr->lengths[1][i]),
+					diff_limit,
+					'>');
+	}
+
+      /* Place this block on the blocklist.  */
+      *block_list_end = bptr;
+      block_list_end = &bptr->next;
+    }
+
+  *block_list_end = 0;
+  *last_block = bptr;
+  return block_list;
+}
+
+/*
+ * This routine will parse a normal format diff control string.  It
+ * returns the type of the diff (ERROR if the format is bad).  All of
+ * the other important information is filled into to the structure
+ * pointed to by db, and the string pointer (whose location is passed
+ * to this routine) is updated to point beyond the end of the string
+ * parsed.  Note that only the ranges in the diff_block will be set by
+ * this routine.
+ *
+ * If some specific pair of numbers has been reduced to a single
+ * number, then both corresponding numbers in the diff block are set
+ * to that number.  In general these numbers are interpetted as ranges
+ * inclusive, unless being used by the ADD or DELETE commands.  It is
+ * assumed that these will be special cased in a superior routine.
+ */
+
+static enum diff_type
+process_diff_control (string, db)
+     char **string;
+     struct diff_block *db;
+{
+  char *s = *string;
+  int holdnum;
+  enum diff_type type;
+
+/* These macros are defined here because they can use variables
+   defined in this function.  Don't try this at home kids, we're
+   trained professionals!
+
+   Also note that SKIPWHITE only recognizes tabs and spaces, and
+   that READNUM can only read positive, integral numbers */
+
+#define	SKIPWHITE(s)	{ while (*s == ' ' || *s == '\t') s++; }
+#define	READNUM(s, num)	\
+	{ unsigned char c = *s; if (!ISDIGIT (c)) return ERROR; holdnum = 0; \
+	  do { holdnum = (c - '0' + holdnum * 10); }	\
+	  while (ISDIGIT (c = *++s)); (num) = holdnum; }
+
+  /* Read first set of digits */
+  SKIPWHITE (s);
+  READNUM (s, db->ranges[0][START]);
+
+  /* Was that the only digit? */
+  SKIPWHITE (s);
+  if (*s == ',')
+    {
+      /* Get the next digit */
+      s++;
+      READNUM (s, db->ranges[0][END]);
+    }
+  else
+    db->ranges[0][END] = db->ranges[0][START];
+
+  /* Get the letter */
+  SKIPWHITE (s);
+  switch (*s)
+    {
+    case 'a':
+      type = ADD;
+      break;
+    case 'c':
+      type = CHANGE;
+      break;
+    case 'd':
+      type = DELETE;
+      break;
+    default:
+      return ERROR;			/* Bad format */
+    }
+  s++;				/* Past letter */
+
+  /* Read second set of digits */
+  SKIPWHITE (s);
+  READNUM (s, db->ranges[1][START]);
+
+  /* Was that the only digit? */
+  SKIPWHITE (s);
+  if (*s == ',')
+    {
+      /* Get the next digit */
+      s++;
+      READNUM (s, db->ranges[1][END]);
+      SKIPWHITE (s);		/* To move to end */
+    }
+  else
+    db->ranges[1][END] = db->ranges[1][START];
+
+  *string = s;
+  return type;
+}
+
+static char *
+read_diff (filea, fileb, output_placement)
+     char const *filea, *fileb;
+     char **output_placement;
+{
+  char *diff_result;
+  size_t bytes, current_chunk_size, total;
+  int fd, wstatus;
+  struct stat pipestat;
+
+  /* 302 / 1000 is log10(2.0) rounded up.  Subtract 1 for the sign bit;
+     add 1 for integer division truncation; add 1 more for a minus sign.  */
+#define INT_STRLEN_BOUND(type) ((sizeof(type)*CHAR_BIT - 1) * 302 / 1000 + 2)
+
+#if HAVE_FORK
+
+  char const *argv[7];
+  char horizon_arg[17 + INT_STRLEN_BOUND (int)];
+  char const **ap;
+  int fds[2];
+  pid_t pid;
+
+  ap = argv;
+  *ap++ = diff_program;
+  if (always_text)
+    *ap++ = "-a";
+  sprintf (horizon_arg, "--horizon-lines=%d", horizon_lines);
+  *ap++ = horizon_arg;
+  *ap++ = "--";
+  *ap++ = filea;
+  *ap++ = fileb;
+  *ap = 0;
+
+  if (pipe (fds) != 0)
+    perror_with_exit ("pipe");
+
+  pid = fork ();
+  if (pid == 0)
+    {
+      /* Child */
+      close (fds[0]);
+      if (fds[1] != STDOUT_FILENO)
+	{
+	  dup2 (fds[1], STDOUT_FILENO);
+	  close (fds[1]);
+	}
+      execve (diff_program, (char **) argv, environ);
+      /* Avoid stdio, because the parent process's buffers are inherited.  */
+      write (STDERR_FILENO, diff_program, strlen (diff_program));
+      write (STDERR_FILENO, ": not found\n", 12);
+      _exit (2);
+    }
+
+  if (pid == -1)
+    perror_with_exit ("fork failed");
+
+  close (fds[1]);		/* Prevent erroneous lack of EOF */
+  fd = fds[0];
+
+#else /* ! HAVE_FORK */
+
+  FILE *fpipe;
+  char *command = xmalloc (sizeof (diff_program) + 30 + INT_STRLEN_BOUND (int)
+			   + 4 * (strlen (filea) + strlen (fileb)));
+  char *p;
+  sprintf (command, "%s -a --horizon-lines=%d -- ",
+	   diff_program, horizon_lines);
+  p = command + strlen (command);
+  SYSTEM_QUOTE_ARG (p, filea);
+  *p++ = ' ';
+  SYSTEM_QUOTE_ARG (p, fileb);
+  *p = '\0';
+  fpipe = popen (command, "r");
+  if (!fpipe)
+    perror_with_exit (command);
+  free (command);
+  fd = fileno (fpipe);
+
+#endif /* ! HAVE_FORK */
+
+  current_chunk_size = 8 * 1024;
+  if (fstat (fd, &pipestat) == 0)
+    current_chunk_size = max (current_chunk_size, STAT_BLOCKSIZE (pipestat));
+
+  diff_result = xmalloc (current_chunk_size);
+  total = 0;
+  do {
+    bytes = myread (fd,
+		    diff_result + total,
+		    current_chunk_size - total);
+    total += bytes;
+    if (total == current_chunk_size)
+      {
+	if (current_chunk_size < 2 * current_chunk_size)
+	  current_chunk_size = 2 * current_chunk_size;
+	else if (current_chunk_size < (size_t) -1)
+	  current_chunk_size = (size_t) -1;
+	else
+	  fatal ("files are too large to fit into memory");
+	diff_result = xrealloc (diff_result, (current_chunk_size *= 2));
+      }
+  } while (bytes);
+
+  if (total != 0 && diff_result[total-1] != '\n')
+    fatal ("invalid diff format; incomplete last line");
+
+  *output_placement = diff_result;
+
+#if ! HAVE_FORK
+
+  wstatus = pclose (fpipe);
+
+#else /* HAVE_FORK */
+
+  if (close (fd) != 0)
+    perror_with_exit ("pipe close");
+  if (waitpid (pid, &wstatus, 0) < 0)
+    perror_with_exit ("waitpid failed");
+
+#endif /* HAVE_FORK */
+
+  if (! (WIFEXITED (wstatus) && WEXITSTATUS (wstatus) < 2))
+    fatal ("subsidiary diff failed");
+
+  return diff_result + total;
+}
+
+
+/*
+ * Scan a regular diff line (consisting of > or <, followed by a
+ * space, followed by text (including nulls) up to a newline.
+ *
+ * This next routine began life as a macro and many parameters in it
+ * are used as call-by-reference values.
+ */
+static char *
+scan_diff_line (scan_ptr, set_start, set_length, limit, leadingchar)
+     char *scan_ptr, **set_start;
+     size_t *set_length;
+     char *limit;
+     int leadingchar;
+{
+  char *line_ptr;
+
+  if (!(scan_ptr[0] == leadingchar
+	&& scan_ptr[1] == ' '))
+    fatal ("invalid diff format; incorrect leading line chars");
+
+  *set_start = line_ptr = scan_ptr + 2;
+  while (*line_ptr++ != '\n')
+    ;
+
+  /* Include newline if the original line ended in a newline,
+     or if an edit script is being generated.
+     Copy any missing newline message to stderr if an edit script is being
+     generated, because edit scripts cannot handle missing newlines.
+     Return the beginning of the next line.  */
+  *set_length = line_ptr - *set_start;
+  if (line_ptr < limit && *line_ptr == '\\')
+    {
+      if (edscript)
+	fprintf (stderr, "%s:", program_name);
+      else
+	--*set_length;
+      line_ptr++;
+      do
+	{
+	  if (edscript)
+	    putc (*line_ptr, stderr);
+	}
+      while (*line_ptr++ != '\n');
+    }
+
+  return line_ptr;
+}
+
+/*
+ * This routine outputs a three way diff passed as a list of
+ * diff3_block's.
+ * The argument MAPPING is indexed by external file number (in the
+ * argument list) and contains the internal file number (from the
+ * diff passed).  This is important because the user expects his
+ * outputs in terms of the argument list number, and the diff passed
+ * may have been done slightly differently (if the last argument
+ * was "-", for example).
+ * REV_MAPPING is the inverse of MAPPING.
+ */
+static void
+output_diff3 (outputfile, diff, mapping, rev_mapping)
+     FILE *outputfile;
+     struct diff3_block *diff;
+     int const mapping[3], rev_mapping[3];
+{
+  int i;
+  int oddoneout;
+  char *cp;
+  struct diff3_block *ptr;
+  int line;
+  size_t length;
+  int dontprint;
+  static int skew_increment[3] = { 2, 3, 1 }; /* 0==>2==>1==>3 */
+  char const *line_prefix = tab_align_flag ? "\t" : "  ";
+
+  for (ptr = diff; ptr; ptr = D_NEXT (ptr))
+    {
+      char x[2];
+
+      switch (ptr->correspond)
+	{
+	case DIFF_ALL:
+	  x[0] = '\0';
+	  dontprint = 3;	/* Print them all */
+	  oddoneout = 3;	/* Nobody's odder than anyone else */
+	  break;
+	case DIFF_1ST:
+	case DIFF_2ND:
+	case DIFF_3RD:
+	  oddoneout = rev_mapping[(int) ptr->correspond - (int) DIFF_1ST];
+
+	  x[0] = oddoneout + '1';
+	  x[1] = '\0';
+	  dontprint = oddoneout==0;
+	  break;
+	default:
+	  fatal ("internal error: invalid diff type passed to output");
+	}
+      fprintf (outputfile, "====%s\n", x);
+
+      /* Go 0, 2, 1 if the first and third outputs are equivalent.  */
+      for (i = 0; i < 3;
+	   i = (oddoneout == 1 ? skew_increment[i] : i + 1))
+	{
+	  int realfile = mapping[i];
+	  int
+	    lowt = D_LOWLINE (ptr, realfile),
+	    hight = D_HIGHLINE (ptr, realfile);
+
+	  fprintf (outputfile, "%d:", i + 1);
+	  switch (lowt - hight)
+	    {
+	    case 1:
+	      fprintf (outputfile, "%da\n", lowt - 1);
+	      break;
+	    case 0:
+	      fprintf (outputfile, "%dc\n", lowt);
+	      break;
+	    default:
+	      fprintf (outputfile, "%d,%dc\n", lowt, hight);
+	      break;
+	    }
+
+	  if (i == dontprint) continue;
+
+	  if (lowt <= hight)
+	    {
+	      line = 0;
+	      do
+		{
+		  fprintf (outputfile, line_prefix);
+		  cp = D_RELNUM (ptr, realfile, line);
+		  length = D_RELLEN (ptr, realfile, line);
+		  fwrite (cp, sizeof (char), length, outputfile);
+		}
+	      while (++line < hight - lowt + 1);
+	      if (cp[length - 1] != '\n')
+		fprintf (outputfile, "\n\\ No newline at end of file\n");
+	    }
+	}
+    }
+}
+
+
+/*
+ * Output to OUTPUTFILE the lines of B taken from FILENUM.
+ * Double any initial '.'s; yield nonzero if any initial '.'s were doubled.
+ */
+static int
+dotlines (outputfile, b, filenum)
+     FILE *outputfile;
+     struct diff3_block *b;
+     int filenum;
+{
+  int i;
+  int leading_dot = 0;
+
+  for (i = 0;
+       i < D_NUMLINES (b, filenum);
+       i++)
+    {
+      char *line = D_RELNUM (b, filenum, i);
+      if (line[0] == '.')
+	{
+	  leading_dot = 1;
+	  fprintf (outputfile, ".");
+	}
+      fwrite (line, sizeof (char),
+	      D_RELLEN (b, filenum, i), outputfile);
+    }
+
+  return leading_dot;
+}
+
+/*
+ * Output to OUTPUTFILE a '.' line.  If LEADING_DOT is nonzero,
+ * also output a command that removes initial '.'s
+ * starting with line START and continuing for NUM lines.
+ */
+static void
+undotlines (outputfile, leading_dot, start, num)
+     FILE *outputfile;
+     int leading_dot, start, num;
+{
+  fprintf (outputfile, ".\n");
+  if (leading_dot)
+    if (num == 1)
+      fprintf (outputfile, "%ds/^\\.//\n", start);
+    else
+      fprintf (outputfile, "%d,%ds/^\\.//\n", start, start + num - 1);
+}
+
+/*
+ * This routine outputs a diff3 set of blocks as an ed script.  This
+ * script applies the changes between file's 2 & 3 to file 1.  It
+ * takes the precise format of the ed script to be output from global
+ * variables set during options processing.  Note that it does
+ * destructive things to the set of diff3 blocks it is passed; it
+ * reverses their order (this gets around the problems involved with
+ * changing line numbers in an ed script).
+ *
+ * Note that this routine has the same problem of mapping as the last
+ * one did; the variable MAPPING maps from file number according to
+ * the argument list to file number according to the diff passed.  All
+ * files listed below are in terms of the argument list.
+ * REV_MAPPING is the inverse of MAPPING.
+ *
+ * The arguments FILE0, FILE1 and FILE2 are the strings to print
+ * as the names of the three files.  These may be the actual names,
+ * or may be the arguments specified with -L.
+ *
+ * Returns 1 if conflicts were found.
+ */
+
+static int
+output_diff3_edscript (outputfile, diff, mapping, rev_mapping,
+		       file0, file1, file2)
+     FILE *outputfile;
+     struct diff3_block *diff;
+     int const mapping[3], rev_mapping[3];
+     char const *file0, *file1, *file2;
+{
+  int leading_dot;
+  int conflicts_found = 0, conflict;
+  struct diff3_block *b;
+
+  for (b = reverse_diff3_blocklist (diff); b; b = b->next)
+    {
+      /* Must do mapping correctly.  */
+      enum diff_type type
+	= ((b->correspond == DIFF_ALL) ?
+	   DIFF_ALL :
+	   ((enum diff_type)
+	    (((int) DIFF_1ST)
+	     + rev_mapping[(int) b->correspond - (int) DIFF_1ST])));
+
+      /* If we aren't supposed to do this output block, skip it.  */
+      switch (type)
+	{
+	default: continue;
+	case DIFF_2ND: if (!show_2nd) continue; conflict = 1; break;
+	case DIFF_3RD: if (overlap_only) continue; conflict = 0; break;
+	case DIFF_ALL: if (simple_only) continue; conflict = flagging; break;
+	}
+
+      if (conflict)
+	{
+	  conflicts_found = 1;
+
+
+	  /* Mark end of conflict.  */
+
+	  fprintf (outputfile, "%da\n", D_HIGHLINE (b, mapping[FILE0]));
+	  leading_dot = 0;
+	  if (type == DIFF_ALL)
+	    {
+	      if (show_2nd)
+		{
+		  /* Append lines from FILE1.  */
+		  fprintf (outputfile, "||||||| %s\n", file1);
+		  leading_dot = dotlines (outputfile, b, mapping[FILE1]);
+		}
+	      /* Append lines from FILE2.  */
+	      fprintf (outputfile, "=======\n");
+	      leading_dot |= dotlines (outputfile, b, mapping[FILE2]);
+	    }
+	  fprintf (outputfile, ">>>>>>> %s\n", file2);
+	  undotlines (outputfile, leading_dot,
+		      D_HIGHLINE (b, mapping[FILE0]) + 2,
+		      (D_NUMLINES (b, mapping[FILE1])
+		       + D_NUMLINES (b, mapping[FILE2]) + 1));
+
+
+	  /* Mark start of conflict.  */
+
+	  fprintf (outputfile, "%da\n<<<<<<< %s\n",
+		   D_LOWLINE (b, mapping[FILE0]) - 1,
+		   type == DIFF_ALL ? file0 : file1);
+	  leading_dot = 0;
+	  if (type == DIFF_2ND)
+	    {
+	      /* Prepend lines from FILE1.  */
+	      leading_dot = dotlines (outputfile, b, mapping[FILE1]);
+	      fprintf (outputfile, "=======\n");
+	    }
+	  undotlines (outputfile, leading_dot,
+		      D_LOWLINE (b, mapping[FILE0]) + 1,
+		      D_NUMLINES (b, mapping[FILE1]));
+	}
+      else if (D_NUMLINES (b, mapping[FILE2]) == 0)
+	/* Write out a delete */
+	{
+	  if (D_NUMLINES (b, mapping[FILE0]) == 1)
+	    fprintf (outputfile, "%dd\n",
+		     D_LOWLINE (b, mapping[FILE0]));
+	  else
+	    fprintf (outputfile, "%d,%dd\n",
+		     D_LOWLINE (b, mapping[FILE0]),
+		     D_HIGHLINE (b, mapping[FILE0]));
+	}
+      else
+	/* Write out an add or change */
+	{
+	  switch (D_NUMLINES (b, mapping[FILE0]))
+	    {
+	    case 0:
+	      fprintf (outputfile, "%da\n",
+		       D_HIGHLINE (b, mapping[FILE0]));
+	      break;
+	    case 1:
+	      fprintf (outputfile, "%dc\n",
+		       D_HIGHLINE (b, mapping[FILE0]));
+	      break;
+	    default:
+	      fprintf (outputfile, "%d,%dc\n",
+		       D_LOWLINE (b, mapping[FILE0]),
+		       D_HIGHLINE (b, mapping[FILE0]));
+	      break;
+	    }
+
+	  undotlines (outputfile, dotlines (outputfile, b, mapping[FILE2]),
+		      D_LOWLINE (b, mapping[FILE0]),
+		      D_NUMLINES (b, mapping[FILE2]));
+	}
+    }
+  if (finalwrite) fprintf (outputfile, "w\nq\n");
+  return conflicts_found;
+}
+
+/*
+ * Read from INFILE and output to OUTPUTFILE a set of diff3_ blocks DIFF
+ * as a merged file.  This acts like 'ed file0 <[output_diff3_edscript]',
+ * except that it works even for binary data or incomplete lines.
+ *
+ * As before, MAPPING maps from arg list file number to diff file number,
+ * REV_MAPPING is its inverse,
+ * and FILE0, FILE1, and FILE2 are the names of the files.
+ *
+ * Returns 1 if conflicts were found.
+ */
+
+static int
+output_diff3_merge (infile, outputfile, diff, mapping, rev_mapping,
+		    file0, file1, file2)
+     FILE *infile, *outputfile;
+     struct diff3_block *diff;
+     int const mapping[3], rev_mapping[3];
+     char const *file0, *file1, *file2;
+{
+  int c, i;
+  int conflicts_found = 0, conflict;
+  struct diff3_block *b;
+  int linesread = 0;
+
+  for (b = diff; b; b = b->next)
+    {
+      /* Must do mapping correctly.  */
+      enum diff_type type
+	= ((b->correspond == DIFF_ALL) ?
+	   DIFF_ALL :
+	   ((enum diff_type)
+	    (((int) DIFF_1ST)
+	     + rev_mapping[(int) b->correspond - (int) DIFF_1ST])));
+      char const *format_2nd = "<<<<<<< %s\n";
+
+      /* If we aren't supposed to do this output block, skip it.  */
+      switch (type)
+	{
+	default: continue;
+	case DIFF_2ND: if (!show_2nd) continue; conflict = 1; break;
+	case DIFF_3RD: if (overlap_only) continue; conflict = 0; break;
+	case DIFF_ALL: if (simple_only) continue; conflict = flagging;
+	  format_2nd = "||||||| %s\n";
+	  break;
+	}
+
+      /* Copy I lines from file 0.  */
+      i = D_LOWLINE (b, FILE0) - linesread - 1;
+      linesread += i;
+      while (0 <= --i)
+	do
+	  {
+	    c = getc (infile);
+	    if (c == EOF)
+	      if (ferror (infile))
+		perror_with_exit ("input file");
+	      else if (feof (infile))
+		fatal ("input file shrank");
+	    putc (c, outputfile);
+	  }
+	while (c != '\n');
+
+      if (conflict)
+	{
+	  conflicts_found = 1;
+
+	  if (type == DIFF_ALL)
+	    {
+	      /* Put in lines from FILE0 with bracket.  */
+	      fprintf (outputfile, "<<<<<<< %s\n", file0);
+	      for (i = 0;
+		   i < D_NUMLINES (b, mapping[FILE0]);
+		   i++)
+		fwrite (D_RELNUM (b, mapping[FILE0], i), sizeof (char),
+			D_RELLEN (b, mapping[FILE0], i), outputfile);
+	    }
+
+	  if (show_2nd)
+	    {
+	      /* Put in lines from FILE1 with bracket.  */
+	      fprintf (outputfile, format_2nd, file1);
+	      for (i = 0;
+		   i < D_NUMLINES (b, mapping[FILE1]);
+		   i++)
+		fwrite (D_RELNUM (b, mapping[FILE1], i), sizeof (char),
+			D_RELLEN (b, mapping[FILE1], i), outputfile);
+	    }
+
+	  fprintf (outputfile, "=======\n");
+	}
+
+      /* Put in lines from FILE2.  */
+      for (i = 0;
+	   i < D_NUMLINES (b, mapping[FILE2]);
+	   i++)
+	fwrite (D_RELNUM (b, mapping[FILE2], i), sizeof (char),
+		D_RELLEN (b, mapping[FILE2], i), outputfile);
+
+      if (conflict)
+	fprintf (outputfile, ">>>>>>> %s\n", file2);
+
+      /* Skip I lines in file 0.  */
+      i = D_NUMLINES (b, FILE0);
+      linesread += i;
+      while (0 <= --i)
+	while ((c = getc (infile)) != '\n')
+	  if (c == EOF)
+	    if (ferror (infile))
+	      perror_with_exit ("input file");
+	    else if (feof (infile))
+	      {
+		if (i || b->next)
+		  fatal ("input file shrank");
+		return conflicts_found;
+	      }
+    }
+  /* Copy rest of common file.  */
+  while ((c = getc (infile)) != EOF || !(ferror (infile) | feof (infile)))
+    putc (c, outputfile);
+  return conflicts_found;
+}
+
+/*
+ * Reverse the order of the list of diff3 blocks.
+ */
+static struct diff3_block *
+reverse_diff3_blocklist (diff)
+     struct diff3_block *diff;
+{
+  register struct diff3_block *tmp, *next, *prev;
+
+  for (tmp = diff, prev = 0;  tmp;  tmp = next)
+    {
+      next = tmp->next;
+      tmp->next = prev;
+      prev = tmp;
+    }
+
+  return prev;
+}
+
+static size_t
+myread (fd, ptr, size)
+     int fd;
+     char *ptr;
+     size_t size;
+{
+  size_t result = read (fd, ptr, size);
+  if (result == -1)
+    perror_with_exit ("read failed");
+  return result;
+}
+
+static VOID *
+xmalloc (size)
+     size_t size;
+{
+  VOID *result = (VOID *) malloc (size ? size : 1);
+  if (!result)
+    fatal ("memory exhausted");
+  return result;
+}
+
+static VOID *
+xrealloc (ptr, size)
+     VOID *ptr;
+     size_t size;
+{
+  VOID *result = (VOID *) realloc (ptr, size ? size : 1);
+  if (!result)
+    fatal ("memory exhausted");
+  return result;
+}
+
+static void
+fatal (string)
+     char const *string;
+{
+  fprintf (stderr, "%s: %s\n", program_name, string);
+  exit (2);
+}
+
+static void
+perror_with_exit (string)
+     char const *string;
+{
+  int e = errno;
+  fprintf (stderr, "%s: ", program_name);
+  errno = e;
+  perror (string);
+  exit (2);
+}

+ 216 - 0
sys/src/ape/cmd/diff/dir.c

@@ -0,0 +1,216 @@
+/* Read, sort and compare two directories.  Used for GNU DIFF.
+   Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#include "diff.h"
+
+/* Read the directory named by DIR and store into DIRDATA a sorted vector
+   of filenames for its contents.  DIR->desc == -1 means this directory is
+   known to be nonexistent, so set DIRDATA to an empty vector.
+   Return -1 (setting errno) if error, 0 otherwise.  */
+
+struct dirdata
+{
+  char const **names;	/* Sorted names of files in dir, 0-terminated.  */
+  char *data;	/* Allocated storage for file names.  */
+};
+
+static int compare_names PARAMS((void const *, void const *));
+static int dir_sort PARAMS((struct file_data const *, struct dirdata *));
+
+static int
+dir_sort (dir, dirdata)
+     struct file_data const *dir;
+     struct dirdata *dirdata;
+{
+  register struct dirent *next;
+  register int i;
+
+  /* Address of block containing the files that are described.  */
+  char const **names;
+
+  /* Number of files in directory.  */
+  size_t nnames;
+
+  /* Allocated and used storage for file name data.  */
+  char *data;
+  size_t data_alloc, data_used;
+
+  dirdata->names = 0;
+  dirdata->data = 0;
+  nnames = 0;
+  data = 0;
+
+  if (dir->desc != -1)
+    {
+      /* Open the directory and check for errors.  */
+      register DIR *reading = opendir (dir->name);
+      if (!reading)
+	return -1;
+
+      /* Initialize the table of filenames.  */
+
+      data_alloc = max (1, (size_t) dir->stat.st_size);
+      data_used = 0;
+      dirdata->data = data = xmalloc (data_alloc);
+
+      /* Read the directory entries, and insert the subfiles
+	 into the `data' table.  */
+
+      while ((errno = 0, (next = readdir (reading)) != 0))
+	{
+	  char *d_name = next->d_name;
+	  size_t d_size = NAMLEN (next) + 1;
+
+	  /* Ignore the files `.' and `..' */
+	  if (d_name[0] == '.'
+	      && (d_name[1] == 0 || (d_name[1] == '.' && d_name[2] == 0)))
+	    continue;
+
+	  if (excluded_filename (d_name))
+	    continue;
+
+	  while (data_alloc < data_used + d_size)
+	    dirdata->data = data = xrealloc (data, data_alloc *= 2);
+	  memcpy (data + data_used, d_name, d_size);
+	  data_used += d_size;
+	  nnames++;
+	}
+      if (errno)
+	{
+	  int e = errno;
+	  closedir (reading);
+	  errno = e;
+	  return -1;
+	}
+#if CLOSEDIR_VOID
+      closedir (reading);
+#else
+      if (closedir (reading) != 0)
+	return -1;
+#endif
+    }
+
+  /* Create the `names' table from the `data' table.  */
+  dirdata->names = names = (char const **) xmalloc (sizeof (char *)
+						    * (nnames + 1));
+  for (i = 0;  i < nnames;  i++)
+    {
+      names[i] = data;
+      data += strlen (data) + 1;
+    }
+  names[nnames] = 0;
+
+  /* Sort the table.  */
+  qsort (names, nnames, sizeof (char *), compare_names);
+
+  return 0;
+}
+
+/* Sort the files now in the table.  */
+
+static int
+compare_names (file1, file2)
+     void const *file1, *file2;
+{
+  return filename_cmp (* (char const *const *) file1,
+		       * (char const *const *) file2);
+}
+
+/* Compare the contents of two directories named in FILEVEC[0] and FILEVEC[1].
+   This is a top-level routine; it does everything necessary for diff
+   on two directories.
+
+   FILEVEC[0].desc == -1 says directory FILEVEC[0] doesn't exist,
+   but pretend it is empty.  Likewise for FILEVEC[1].
+
+   HANDLE_FILE is a caller-provided subroutine called to handle each file.
+   It gets five operands: dir and name (rel to original working dir) of file
+   in dir 0, dir and name pathname of file in dir 1, and the recursion depth.
+
+   For a file that appears in only one of the dirs, one of the name-args
+   to HANDLE_FILE is zero.
+
+   DEPTH is the current depth in recursion, used for skipping top-level
+   files by the -S option.
+
+   Returns the maximum of all the values returned by HANDLE_FILE,
+   or 2 if trouble is encountered in opening files.  */
+
+int
+diff_dirs (filevec, handle_file, depth)
+     struct file_data const filevec[];
+     int (*handle_file) PARAMS((char const *, char const *, char const *, char const *, int));
+     int depth;
+{
+  struct dirdata dirdata[2];
+  int val = 0;			/* Return value.  */
+  int i;
+
+  /* Get sorted contents of both dirs.  */
+  for (i = 0; i < 2; i++)
+    if (dir_sort (&filevec[i], &dirdata[i]) != 0)
+      {
+	perror_with_name (filevec[i].name);
+	val = 2;
+      }
+
+  if (val == 0)
+    {
+      register char const * const *names0 = dirdata[0].names;
+      register char const * const *names1 = dirdata[1].names;
+      char const *name0 = filevec[0].name;
+      char const *name1 = filevec[1].name;
+
+      /* If `-S name' was given, and this is the topmost level of comparison,
+	 ignore all file names less than the specified starting name.  */
+
+      if (dir_start_file && depth == 0)
+	{
+	  while (*names0 && filename_cmp (*names0, dir_start_file) < 0)
+	    names0++;
+	  while (*names1 && filename_cmp (*names1, dir_start_file) < 0)
+	    names1++;
+	}
+
+      /* Loop while files remain in one or both dirs.  */
+      while (*names0 || *names1)
+	{
+	  /* Compare next name in dir 0 with next name in dir 1.
+	     At the end of a dir,
+	     pretend the "next name" in that dir is very large.  */
+	  int nameorder = (!*names0 ? 1 : !*names1 ? -1
+			   : filename_cmp (*names0, *names1));
+	  int v1 = (*handle_file) (name0, 0 < nameorder ? 0 : *names0++,
+				   name1, nameorder < 0 ? 0 : *names1++,
+				   depth + 1);
+	  if (v1 > val)
+	    val = v1;
+	}
+    }
+
+  for (i = 0; i < 2; i++)
+    {
+      if (dirdata[i].names)
+	free (dirdata[i].names);
+      if (dirdata[i].data)
+	free (dirdata[i].data);
+    }
+
+  return val;
+}

+ 200 - 0
sys/src/ape/cmd/diff/ed.c

@@ -0,0 +1,200 @@
+/* Output routines for ed-script format.
+   Copyright (C) 1988, 89, 91, 92, 93 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#include "diff.h"
+
+static void print_ed_hunk PARAMS((struct change *));
+static void print_rcs_hunk PARAMS((struct change *));
+static void pr_forward_ed_hunk PARAMS((struct change *));
+
+/* Print our script as ed commands.  */
+
+void
+print_ed_script (script)
+    struct change *script;
+{
+  print_script (script, find_reverse_change, print_ed_hunk);
+}
+
+/* Print a hunk of an ed diff */
+
+static void
+print_ed_hunk (hunk)
+     struct change *hunk; 
+{
+  int f0, l0, f1, l1;
+  int deletes, inserts;
+
+#if 0
+  hunk = flip_script (hunk);
+#endif
+#ifdef DEBUG
+  debug_script (hunk);
+#endif
+
+  /* Determine range of line numbers involved in each file.  */
+  analyze_hunk (hunk, &f0, &l0, &f1, &l1, &deletes, &inserts);
+  if (!deletes && !inserts)
+    return;
+
+  begin_output ();
+
+  /* Print out the line number header for this hunk */
+  print_number_range (',', &files[0], f0, l0);
+  fprintf (outfile, "%c\n", change_letter (inserts, deletes));
+
+  /* Print new/changed lines from second file, if needed */
+  if (inserts)
+    {
+      int i;
+      int inserting = 1;
+      for (i = f1; i <= l1; i++)
+	{
+	  /* Resume the insert, if we stopped.  */
+	  if (! inserting)
+	    fprintf (outfile, "%da\n",
+		     i - f1 + translate_line_number (&files[0], f0) - 1);
+	  inserting = 1;
+
+	  /* If the file's line is just a dot, it would confuse `ed'.
+	     So output it with a double dot, and set the flag LEADING_DOT
+	     so that we will output another ed-command later
+	     to change the double dot into a single dot.  */
+
+	  if (files[1].linbuf[i][0] == '.'
+	      && files[1].linbuf[i][1] == '\n')
+	    {
+	      fprintf (outfile, "..\n");
+	      fprintf (outfile, ".\n");
+	      /* Now change that double dot to the desired single dot.  */
+	      fprintf (outfile, "%ds/^\\.\\././\n",
+		       i - f1 + translate_line_number (&files[0], f0));
+	      inserting = 0;
+	    }
+	  else
+	    /* Line is not `.', so output it unmodified.  */
+	    print_1_line ("", &files[1].linbuf[i]);
+	}
+
+      /* End insert mode, if we are still in it.  */
+      if (inserting)
+	fprintf (outfile, ".\n");
+    }
+}
+
+/* Print change script in the style of ed commands,
+   but print the changes in the order they appear in the input files,
+   which means that the commands are not truly useful with ed.  */
+
+void
+pr_forward_ed_script (script)
+     struct change *script;
+{
+  print_script (script, find_change, pr_forward_ed_hunk);
+}
+
+static void
+pr_forward_ed_hunk (hunk)
+     struct change *hunk;
+{
+  int i;
+  int f0, l0, f1, l1;
+  int deletes, inserts;
+
+  /* Determine range of line numbers involved in each file.  */
+  analyze_hunk (hunk, &f0, &l0, &f1, &l1, &deletes, &inserts);
+  if (!deletes && !inserts)
+    return;
+
+  begin_output ();
+
+  fprintf (outfile, "%c", change_letter (inserts, deletes));
+  print_number_range (' ', files, f0, l0);
+  fprintf (outfile, "\n");
+
+  /* If deletion only, print just the number range.  */
+
+  if (!inserts)
+    return;
+
+  /* For insertion (with or without deletion), print the number range
+     and the lines from file 2.  */
+
+  for (i = f1; i <= l1; i++)
+    print_1_line ("", &files[1].linbuf[i]);
+
+  fprintf (outfile, ".\n");
+}
+
+/* Print in a format somewhat like ed commands
+   except that each insert command states the number of lines it inserts.
+   This format is used for RCS.  */
+
+void
+print_rcs_script (script)
+     struct change *script;
+{
+  print_script (script, find_change, print_rcs_hunk);
+}
+
+/* Print a hunk of an RCS diff */
+
+static void
+print_rcs_hunk (hunk)
+     struct change *hunk;
+{
+  int i;
+  int f0, l0, f1, l1;
+  int deletes, inserts;
+  int tf0, tl0, tf1, tl1;
+
+  /* Determine range of line numbers involved in each file.  */
+  analyze_hunk (hunk, &f0, &l0, &f1, &l1, &deletes, &inserts);
+  if (!deletes && !inserts)
+    return;
+
+  begin_output ();
+
+  translate_range (&files[0], f0, l0, &tf0, &tl0);
+
+  if (deletes)
+    {
+      fprintf (outfile, "d");
+      /* For deletion, print just the starting line number from file 0
+	 and the number of lines deleted.  */
+      fprintf (outfile, "%d %d\n",
+	       tf0,
+	       (tl0 >= tf0 ? tl0 - tf0 + 1 : 1));	     
+    }
+
+  if (inserts)
+    {
+      fprintf (outfile, "a");
+
+      /* Take last-line-number from file 0 and # lines from file 1.  */
+      translate_range (&files[1], f1, l1, &tf1, &tl1);
+      fprintf (outfile, "%d %d\n",
+	       tl0,
+	       (tl1 >= tf1 ? tl1 - tf1 + 1 : 1));	     
+
+      /* Print the inserted lines.  */
+      for (i = f1; i <= l1; i++)
+	print_1_line ("", &files[1].linbuf[i]);
+    }
+}

+ 181 - 0
sys/src/ape/cmd/diff/fnmatch.c

@@ -0,0 +1,181 @@
+/* Copyright (C) 1992 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.  */
+
+/* Modified slightly by Brian Berliner <berliner@sun.com> and
+   Jim Blandy <jimb@cyclic.com> for CVS use */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "system.h"
+
+/* IGNORE(@ */
+/* #include <ansidecl.h> */
+/* @) */
+#include <errno.h>
+#include "fnmatch.h"
+
+#if !defined(__GNU_LIBRARY__) && !defined(STDC_HEADERS)
+extern int errno;
+#endif
+
+/* Match STRING against the filename pattern PATTERN, returning zero if
+   it matches, nonzero if not.  */
+int
+#if __STDC__
+fnmatch (const char *pattern, const char *string, int flags)
+#else
+fnmatch (pattern, string, flags)
+    char *pattern;
+    char *string;
+    int flags;
+#endif
+{
+  register const char *p = pattern, *n = string;
+  register char c;
+
+  if ((flags & ~__FNM_FLAGS) != 0)
+    {
+      errno = EINVAL;
+      return -1;
+    }
+
+  while ((c = *p++) != '\0')
+    {
+      switch (c)
+	{
+	case '?':
+	  if (*n == '\0')
+	    return FNM_NOMATCH;
+	  else if ((flags & FNM_PATHNAME) && *n == '/')
+	    return FNM_NOMATCH;
+	  else if ((flags & FNM_PERIOD) && *n == '.' &&
+		   (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
+	    return FNM_NOMATCH;
+	  break;
+	  
+	case '\\':
+	  if (!(flags & FNM_NOESCAPE))
+	    c = *p++;
+	  if (FOLD_FN_CHAR (*n) != FOLD_FN_CHAR (c))
+	    return FNM_NOMATCH;
+	  break;
+	  
+	case '*':
+	  if ((flags & FNM_PERIOD) && *n == '.' &&
+	      (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
+	    return FNM_NOMATCH;
+	  
+	  for (c = *p++; c == '?' || c == '*'; c = *p++, ++n)
+	    if (((flags & FNM_PATHNAME) && *n == '/') ||
+		(c == '?' && *n == '\0'))
+	      return FNM_NOMATCH;
+	  
+	  if (c == '\0')
+	    return 0;
+	  
+	  {
+	    char c1 = (!(flags & FNM_NOESCAPE) && c == '\\') ? *p : c;
+	    for (--p; *n != '\0'; ++n)
+	      if ((c == '[' || FOLD_FN_CHAR (*n) == FOLD_FN_CHAR (c1)) &&
+		  fnmatch(p, n, flags & ~FNM_PERIOD) == 0)
+		return 0;
+	    return FNM_NOMATCH;
+	  }
+	  
+	case '[':
+	  {
+	    /* Nonzero if the sense of the character class is inverted.  */
+	    register int not;
+	    
+	    if (*n == '\0')
+	      return FNM_NOMATCH;
+	    
+	    if ((flags & FNM_PERIOD) && *n == '.' &&
+		(n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
+	      return FNM_NOMATCH;
+	    
+	    not = (*p == '!' || *p == '^');
+	    if (not)
+	      ++p;
+	    
+	    c = *p++;
+	    for (;;)
+	      {
+		register char cstart = c, cend = c;
+		
+		if (!(flags & FNM_NOESCAPE) && c == '\\')
+		  cstart = cend = *p++;
+		
+		if (c == '\0')
+		  /* [ (unterminated) loses.  */
+		  return FNM_NOMATCH;
+		
+		c = *p++;
+		
+		if ((flags & FNM_PATHNAME) && c == '/')
+		  /* [/] can never match.  */
+		  return FNM_NOMATCH;
+		
+		if (c == '-' && *p != ']')
+		  {
+		    cend = *p++;
+		    if (!(flags & FNM_NOESCAPE) && cend == '\\')
+		      cend = *p++;
+		    if (cend == '\0')
+		      return FNM_NOMATCH;
+		    c = *p++;
+		  }
+		
+		if (*n >= cstart && *n <= cend)
+		  goto matched;
+		
+		if (c == ']')
+		  break;
+	      }
+	    if (!not)
+	      return FNM_NOMATCH;
+	    break;
+	    
+	  matched:;
+	    /* Skip the rest of the [...] that already matched.  */
+	    while (c != ']')
+	      {
+		if (c == '\0')
+		  /* [... (unterminated) loses.  */
+		  return FNM_NOMATCH;
+		
+		c = *p++;
+		if (!(flags & FNM_NOESCAPE) && c == '\\')
+		  /* 1003.2d11 is unclear if this is right.  %%% */
+		  ++p;
+	      }
+	    if (not)
+	      return FNM_NOMATCH;
+	  }
+	  break;
+	  
+	default:
+	  if (FOLD_FN_CHAR (c) != FOLD_FN_CHAR (*n))
+	    return FNM_NOMATCH;
+	}
+      
+      ++n;
+    }
+
+  if (*n == '\0')
+    return 0;
+
+  return FNM_NOMATCH;
+}

+ 40 - 0
sys/src/ape/cmd/diff/fnmatch.h

@@ -0,0 +1,40 @@
+/* Copyright (C) 1992 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Library General Public License for more details.  */
+
+#ifndef	_FNMATCH_H
+
+#define	_FNMATCH_H	1
+
+/* Bits set in the FLAGS argument to `fnmatch'.  */
+#undef FNM_PATHNAME
+#define	FNM_PATHNAME	(1 << 0)/* No wildcard can ever match `/'.  */
+#undef FNM_NOESCAPE
+#define	FNM_NOESCAPE	(1 << 1)/* Backslashes don't quote special chars.  */
+#undef FNM_PERIOD
+#define	FNM_PERIOD	(1 << 2)/* Leading `.' is matched only explicitly.  */
+#undef __FNM_FLAGS
+#define	__FNM_FLAGS	(FNM_PATHNAME|FNM_NOESCAPE|FNM_PERIOD)
+
+/* Value returned by `fnmatch' if STRING does not match PATTERN.  */
+#undef FNM_NOMATCH
+#define	FNM_NOMATCH	1
+
+/* Match STRING against the filename pattern PATTERN,
+   returning zero if it matches, FNM_NOMATCH if not.  */
+#if __STDC__
+extern int fnmatch (const char *pattern, const char *string, int flags);
+#else
+extern int fnmatch ();
+#endif
+
+#endif	/* fnmatch.h */

+ 748 - 0
sys/src/ape/cmd/diff/getopt.c

@@ -0,0 +1,748 @@
+/* Getopt for GNU.
+   NOTE: getopt is now part of the C library, so if you don't know what
+   "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
+   before changing it!
+
+   Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94
+   	Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 2, or (at your option) any
+   later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+   Ditto for AIX 3.2 and <stdlib.h>.  */
+#ifndef _NO_PROTO
+#define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifndef __STDC__
+/* This is a separate conditional since some stdc systems
+   reject `defined (const)'.  */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef	__GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+   contain conflicting prototypes for getopt.  */
+#include <stdlib.h>
+#endif	/* GNU C library.  */
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+   but it behaves differently for the user, since it allows the user
+   to intersperse the options with the other arguments.
+
+   As `getopt' works, it permutes the elements of ARGV so that,
+   when it is done, all the options precede everything else.  Thus
+   all application programs are extended to handle flexible argument order.
+
+   Setting the environment variable POSIXLY_CORRECT disables permutation.
+   Then the behavior is completely standard.
+
+   GNU application programs can use a third alternative mode in which
+   they can distinguish the relative order of options and other arguments.  */
+
+#include "getopt.h"
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+char *optarg = NULL;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns EOF, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+/* XXX 1003.2 says this must be 1 before any call.  */
+int optind = 0;
+
+/* The next char to be scanned in the option-element
+   in which the last option character we returned was found.
+   This allows us to pick up the scan where we left off.
+
+   If this is zero, or a null string, it means resume the scan
+   by advancing to the next ARGV-element.  */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+   for unrecognized options.  */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+   This must be initialized on some systems to avoid linking in the
+   system's own getopt implementation.  */
+
+int optopt = '?';
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+   If the caller did not specify anything,
+   the default is REQUIRE_ORDER if the environment variable
+   POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+   REQUIRE_ORDER means don't recognize them as options;
+   stop option processing when the first non-option is seen.
+   This is what Unix does.
+   This mode of operation is selected by either setting the environment
+   variable POSIXLY_CORRECT, or using `+' as the first character
+   of the list of option characters.
+
+   PERMUTE is the default.  We permute the contents of ARGV as we scan,
+   so that eventually all the non-options are at the end.  This allows options
+   to be given in any order, even with programs that were not written to
+   expect this.
+
+   RETURN_IN_ORDER is an option available to programs that were written
+   to expect options and other ARGV-elements in any order and that care about
+   the ordering of the two.  We describe each non-option ARGV-element
+   as if it were the argument of an option with character code 1.
+   Using `-' as the first character of the list of option characters
+   selects this mode of operation.
+
+   The special argument `--' forces an end of option-scanning regardless
+   of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
+   `--' can cause `getopt' to return EOF with `optind' != ARGC.  */
+
+static enum
+{
+  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable.  */
+static char *posixly_correct;
+
+#ifdef	__GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+   because there are many ways it can cause trouble.
+   On some systems, it contains special magic macros that don't work
+   in GCC.  */
+#include <string.h>
+#define	my_index	strchr
+#else
+
+/* Avoid depending on library functions or files
+   whose names are inconsistent.  */
+
+char *getenv ();
+
+static char *
+my_index (str, chr)
+     const char *str;
+     int chr;
+{
+  while (*str)
+    {
+      if (*str == chr)
+	return (char *) str;
+      str++;
+    }
+  return 0;
+}
+
+/* If using GCC, we can safely declare strlen this way.
+   If not using GCC, it is ok not to declare it.  */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+   That was relevant to code that was here before.  */
+#ifndef __STDC__
+/* gcc with -traditional declares the built-in strlen to return int,
+   and has done so at least since version 2.4.5. -- rms.  */
+extern int strlen (const char *);
+#endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
+
+/* Handle permutation of arguments.  */
+
+/* Describe the part of ARGV that contains non-options that have
+   been skipped.  `first_nonopt' is the index in ARGV of the first of them;
+   `last_nonopt' is the index after the last of them.  */
+
+static int first_nonopt;
+static int last_nonopt;
+
+/* Exchange two adjacent subsequences of ARGV.
+   One subsequence is elements [first_nonopt,last_nonopt)
+   which contains all the non-options that have been skipped so far.
+   The other is elements [last_nonopt,optind), which contains all
+   the options processed since those non-options were skipped.
+
+   `first_nonopt' and `last_nonopt' are relocated so that they describe
+   the new indices of the non-options in ARGV after they are moved.  */
+
+static void
+exchange (argv)
+     char **argv;
+{
+  int bottom = first_nonopt;
+  int middle = last_nonopt;
+  int top = optind;
+  char *tem;
+
+  /* Exchange the shorter segment with the far end of the longer segment.
+     That puts the shorter segment into the right place.
+     It leaves the longer segment in the right place overall,
+     but it consists of two parts that need to be swapped next.  */
+
+  while (top > middle && middle > bottom)
+    {
+      if (top - middle > middle - bottom)
+	{
+	  /* Bottom segment is the short one.  */
+	  int len = middle - bottom;
+	  register int i;
+
+	  /* Swap it with the top part of the top segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[top - (middle - bottom) + i];
+	      argv[top - (middle - bottom) + i] = tem;
+	    }
+	  /* Exclude the moved bottom segment from further swapping.  */
+	  top -= len;
+	}
+      else
+	{
+	  /* Top segment is the short one.  */
+	  int len = top - middle;
+	  register int i;
+
+	  /* Swap it with the bottom part of the bottom segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[middle + i];
+	      argv[middle + i] = tem;
+	    }
+	  /* Exclude the moved top segment from further swapping.  */
+	  bottom += len;
+	}
+    }
+
+  /* Update records for the slots the non-options now occupy.  */
+
+  first_nonopt += (optind - last_nonopt);
+  last_nonopt = optind;
+}
+
+/* Initialize the internal data when the first call is made.  */
+
+static const char *
+_getopt_initialize (optstring)
+     const char *optstring;
+{
+  /* Start processing options with ARGV-element 1 (since ARGV-element 0
+     is the program name); the sequence of previously skipped
+     non-option ARGV-elements is empty.  */
+
+  first_nonopt = last_nonopt = optind = 1;
+
+  nextchar = NULL;
+
+  posixly_correct = getenv ("POSIXLY_CORRECT");
+
+  /* Determine how to handle the ordering of options and nonoptions.  */
+
+  if (optstring[0] == '-')
+    {
+      ordering = RETURN_IN_ORDER;
+      ++optstring;
+    }
+  else if (optstring[0] == '+')
+    {
+      ordering = REQUIRE_ORDER;
+      ++optstring;
+    }
+  else if (posixly_correct != NULL)
+    ordering = REQUIRE_ORDER;
+  else
+    ordering = PERMUTE;
+
+  return optstring;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+   given in OPTSTRING.
+
+   If an element of ARGV starts with '-', and is not exactly "-" or "--",
+   then it is an option element.  The characters of this element
+   (aside from the initial '-') are option characters.  If `getopt'
+   is called repeatedly, it returns successively each of the option characters
+   from each of the option elements.
+
+   If `getopt' finds another option character, it returns that character,
+   updating `optind' and `nextchar' so that the next call to `getopt' can
+   resume the scan with the following option character or ARGV-element.
+
+   If there are no more option characters, `getopt' returns `EOF'.
+   Then `optind' is the index in ARGV of the first ARGV-element
+   that is not an option.  (The ARGV-elements have been permuted
+   so that those that are not options now come last.)
+
+   OPTSTRING is a string containing the legitimate option characters.
+   If an option character is seen that is not listed in OPTSTRING,
+   return '?' after printing an error message.  If you set `opterr' to
+   zero, the error message is suppressed but we still return '?'.
+
+   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+   so the following text in the same ARGV-element, or the text of the following
+   ARGV-element, is returned in `optarg'.  Two colons mean an option that
+   wants an optional arg; if there is text in the current ARGV-element,
+   it is returned in `optarg', otherwise `optarg' is set to zero.
+
+   If OPTSTRING starts with `-' or `+', it requests different methods of
+   handling the non-option ARGV-elements.
+   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+   Long-named options begin with `--' instead of `-'.
+   Their names may be abbreviated as long as the abbreviation is unique
+   or is an exact match for some defined option.  If they have an
+   argument, it follows the option name in the same ARGV-element, separated
+   from the option name by a `=', or else the in next ARGV-element.
+   When `getopt' finds a long-named option, it returns 0 if that option's
+   `flag' field is nonzero, the value of the option's `val' field
+   if the `flag' field is zero.
+
+   The elements of ARGV aren't really const, because we permute them.
+   But we pretend they're const in the prototype to be compatible
+   with other systems.
+
+   LONGOPTS is a vector of `struct option' terminated by an
+   element containing a name which is zero.
+
+   LONGIND returns the index in LONGOPT of the long-named option found.
+   It is only valid when a long-named option has been found by the most
+   recent call.
+
+   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+   long-named options.  */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+     const struct option *longopts;
+     int *longind;
+     int long_only;
+{
+  optarg = NULL;
+
+  if (optind == 0)
+    optstring = _getopt_initialize (optstring);
+
+  if (nextchar == NULL || *nextchar == '\0')
+    {
+      /* Advance to the next ARGV-element.  */
+
+      if (ordering == PERMUTE)
+	{
+	  /* If we have just processed some options following some non-options,
+	     exchange them so that the options come first.  */
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (last_nonopt != optind)
+	    first_nonopt = optind;
+
+	  /* Skip any additional non-options
+	     and extend the range of non-options previously skipped.  */
+
+	  while (optind < argc
+		 && (argv[optind][0] != '-' || argv[optind][1] == '\0'))
+	    optind++;
+	  last_nonopt = optind;
+	}
+
+      /* The special ARGV-element `--' means premature end of options.
+	 Skip it like a null option,
+	 then exchange with previous non-options as if it were an option,
+	 then skip everything else like a non-option.  */
+
+      if (optind != argc && !strcmp (argv[optind], "--"))
+	{
+	  optind++;
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (first_nonopt == last_nonopt)
+	    first_nonopt = optind;
+	  last_nonopt = argc;
+
+	  optind = argc;
+	}
+
+      /* If we have done all the ARGV-elements, stop the scan
+	 and back over any non-options that we skipped and permuted.  */
+
+      if (optind == argc)
+	{
+	  /* Set the next-arg-index to point at the non-options
+	     that we previously skipped, so the caller will digest them.  */
+	  if (first_nonopt != last_nonopt)
+	    optind = first_nonopt;
+	  return EOF;
+	}
+
+      /* If we have come to a non-option and did not permute it,
+	 either stop the scan or describe it to the caller and pass it by.  */
+
+      if ((argv[optind][0] != '-' || argv[optind][1] == '\0'))
+	{
+	  if (ordering == REQUIRE_ORDER)
+	    return EOF;
+	  optarg = argv[optind++];
+	  return 1;
+	}
+
+      /* We have found another option-ARGV-element.
+	 Skip the initial punctuation.  */
+
+      nextchar = (argv[optind] + 1
+		  + (longopts != NULL && argv[optind][1] == '-'));
+    }
+
+  /* Decode the current option-ARGV-element.  */
+
+  /* Check whether the ARGV-element is a long option.
+
+     If long_only and the ARGV-element has the form "-f", where f is
+     a valid short option, don't consider it an abbreviated form of
+     a long option that starts with f.  Otherwise there would be no
+     way to give the -f short option.
+
+     On the other hand, if there's a long option "fubar" and
+     the ARGV-element is "-fu", do consider that an abbreviation of
+     the long option, just like "--fu", and not "-f" with arg "u".
+
+     This distinction seems to be the most useful approach.  */
+
+  if (longopts != NULL
+      && (argv[optind][1] == '-'
+	  || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
+    {
+      char *nameend;
+      const struct option *p;
+      const struct option *pfound = NULL;
+      int exact = 0;
+      int ambig = 0;
+      int indfound;
+      int option_index;
+
+      for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+	/* Do nothing.  */ ;
+
+      /* Test all long options for either exact match
+	 or abbreviated matches.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++)
+	if (!strncmp (p->name, nextchar, nameend - nextchar))
+	  {
+	    if (nameend - nextchar == strlen (p->name))
+	      {
+		/* Exact match found.  */
+		pfound = p;
+		indfound = option_index;
+		exact = 1;
+		break;
+	      }
+	    else if (pfound == NULL)
+	      {
+		/* First nonexact match found.  */
+		pfound = p;
+		indfound = option_index;
+	      }
+	    else
+	      /* Second or later nonexact match found.  */
+	      ambig = 1;
+	  }
+
+      if (ambig && !exact)
+	{
+	  if (opterr)
+	    fprintf (stderr, "%s: option `%s' is ambiguous\n",
+		     argv[0], argv[optind]);
+	  nextchar += strlen (nextchar);
+	  optind++;
+	  return '?';
+	}
+
+      if (pfound != NULL)
+	{
+	  option_index = indfound;
+	  optind++;
+	  if (*nameend)
+	    {
+	      /* Don't test has_arg with >, because some C compilers don't
+		 allow it to be used on enums.  */
+	      if (pfound->has_arg)
+		optarg = nameend + 1;
+	      else
+		{
+		  if (opterr)
+		    {
+		      if (argv[optind - 1][1] == '-')
+			/* --option */
+			fprintf (stderr,
+				 "%s: option `--%s' doesn't allow an argument\n",
+				 argv[0], pfound->name);
+		      else
+			/* +option or -option */
+			fprintf (stderr,
+			     "%s: option `%c%s' doesn't allow an argument\n",
+			     argv[0], argv[optind - 1][0], pfound->name);
+		    }
+		  nextchar += strlen (nextchar);
+		  return '?';
+		}
+	    }
+	  else if (pfound->has_arg == 1)
+	    {
+	      if (optind < argc)
+		optarg = argv[optind++];
+	      else
+		{
+		  if (opterr)
+		    fprintf (stderr, "%s: option `%s' requires an argument\n",
+			     argv[0], argv[optind - 1]);
+		  nextchar += strlen (nextchar);
+		  return optstring[0] == ':' ? ':' : '?';
+		}
+	    }
+	  nextchar += strlen (nextchar);
+	  if (longind != NULL)
+	    *longind = option_index;
+	  if (pfound->flag)
+	    {
+	      *(pfound->flag) = pfound->val;
+	      return 0;
+	    }
+	  return pfound->val;
+	}
+
+      /* Can't find it as a long option.  If this is not getopt_long_only,
+	 or the option starts with '--' or is not a valid short
+	 option, then it's an error.
+	 Otherwise interpret it as a short option.  */
+      if (!long_only || argv[optind][1] == '-'
+	  || my_index (optstring, *nextchar) == NULL)
+	{
+	  if (opterr)
+	    {
+	      if (argv[optind][1] == '-')
+		/* --option */
+		fprintf (stderr, "%s: unrecognized option `--%s'\n",
+			 argv[0], nextchar);
+	      else
+		/* +option or -option */
+		fprintf (stderr, "%s: unrecognized option `%c%s'\n",
+			 argv[0], argv[optind][0], nextchar);
+	    }
+	  nextchar = (char *) "";
+	  optind++;
+	  return '?';
+	}
+    }
+
+  /* Look at and handle the next short option-character.  */
+
+  {
+    char c = *nextchar++;
+    char *temp = my_index (optstring, c);
+
+    /* Increment `optind' when we start to process its last character.  */
+    if (*nextchar == '\0')
+      ++optind;
+
+    if (temp == NULL || c == ':')
+      {
+	if (opterr)
+	  {
+	    if (posixly_correct)
+	      /* 1003.2 specifies the format of this message.  */
+	      fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c);
+	    else
+	      fprintf (stderr, "%s: invalid option -- %c\n", argv[0], c);
+	  }
+	optopt = c;
+	return '?';
+      }
+    if (temp[1] == ':')
+      {
+	if (temp[2] == ':')
+	  {
+	    /* This is an option that accepts an argument optionally.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		optind++;
+	      }
+	    else
+	      optarg = NULL;
+	    nextchar = NULL;
+	  }
+	else
+	  {
+	    /* This is an option that requires an argument.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		/* If we end this ARGV-element by taking the rest as an arg,
+		   we must advance to the next element now.  */
+		optind++;
+	      }
+	    else if (optind == argc)
+	      {
+		if (opterr)
+		  {
+		    /* 1003.2 specifies the format of this message.  */
+		    fprintf (stderr, "%s: option requires an argument -- %c\n",
+			     argv[0], c);
+		  }
+		optopt = c;
+		if (optstring[0] == ':')
+		  c = ':';
+		else
+		  c = '?';
+	      }
+	    else
+	      /* We already incremented `optind' once;
+		 increment it again when taking next ARGV-elt as argument.  */
+	      optarg = argv[optind++];
+	    nextchar = NULL;
+	  }
+      }
+    return c;
+  }
+}
+
+int
+getopt (argc, argv, optstring)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+{
+  return _getopt_internal (argc, argv, optstring,
+			   (const struct option *) 0,
+			   (int *) 0,
+			   0);
+}
+
+#endif	/* _LIBC or not __GNU_LIBRARY__.  */
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+   the above definition of `getopt'.  */
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+
+      c = getopt (argc, argv, "abc:d:0123456789");
+      if (c == EOF)
+	break;
+
+      switch (c)
+	{
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	  if (digit_optind != 0 && digit_optind != this_option_optind)
+	    printf ("digits occur in two different argv-elements.\n");
+	  digit_optind = this_option_optind;
+	  printf ("option %c\n", c);
+	  break;
+
+	case 'a':
+	  printf ("option a\n");
+	  break;
+
+	case 'b':
+	  printf ("option b\n");
+	  break;
+
+	case 'c':
+	  printf ("option c with value `%s'\n", optarg);
+	  break;
+
+	case '?':
+	  break;
+
+	default:
+	  printf ("?? getopt returned character code 0%o ??\n", c);
+	}
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+	printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */

+ 129 - 0
sys/src/ape/cmd/diff/getopt.h

@@ -0,0 +1,129 @@
+/* Declarations for getopt.
+   Copyright (C) 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 2, or (at your option) any
+   later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#ifndef _GETOPT_H
+#define _GETOPT_H 1
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns EOF, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+   for unrecognized options.  */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized.  */
+
+extern int optopt;
+
+/* Describe the long-named options requested by the application.
+   The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+   of `struct option' terminated by an element containing a name which is
+   zero.
+
+   The field `has_arg' is:
+   no_argument		(or 0) if the option does not take an argument,
+   required_argument	(or 1) if the option requires an argument,
+   optional_argument 	(or 2) if the option takes an optional argument.
+
+   If the field `flag' is not NULL, it points to a variable that is set
+   to the value given in the field `val' when the option is found, but
+   left unchanged if the option is not found.
+
+   To have a long-named option do something other than set an `int' to
+   a compiled-in constant, such as set a value from `optarg', set the
+   option's `flag' field to zero and its `val' field to a nonzero
+   value (the equivalent single-letter option character, if there is
+   one).  For long options that have a zero `flag' field, `getopt'
+   returns the contents of the `val' field.  */
+
+struct option
+{
+#if	__STDC__
+  const char *name;
+#else
+  char *name;
+#endif
+  /* has_arg can't be an enum because some compilers complain about
+     type mismatches in all the code that assumes it is an int.  */
+  int has_arg;
+  int *flag;
+  int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'.  */
+
+#define	no_argument		0
+#define required_argument	1
+#define optional_argument	2
+
+#if __STDC__
+#if defined(__GNU_LIBRARY__)
+/* Many other libraries have conflicting prototypes for getopt, with
+   differences in the consts, in stdlib.h.  To avoid compilation
+   errors, only prototype getopt for the GNU C library.  */
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* not __GNU_LIBRARY__ */
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+		        const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+			     const char *shortopts,
+		             const struct option *longopts, int *longind);
+
+/* Internal only.  Users should not call this directly.  */
+extern int _getopt_internal (int argc, char *const *argv,
+			     const char *shortopts,
+		             const struct option *longopts, int *longind,
+			     int long_only);
+#else /* not __STDC__ */
+extern int getopt ();
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* not __STDC__ */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _GETOPT_H */

+ 180 - 0
sys/src/ape/cmd/diff/getopt1.c

@@ -0,0 +1,180 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+   Copyright (C) 1987, 88, 89, 90, 91, 92, 1993
+	Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 2, or (at your option) any
+   later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "getopt.h"
+
+#ifndef __STDC__
+/* This is a separate conditional since some stdc systems
+   reject `defined (const)'.  */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#else
+char *getenv ();
+#endif
+
+#ifndef	NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+     int argc;
+     char *const *argv;
+     const char *options;
+     const struct option *long_options;
+     int *opt_index;
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+   If an option that starts with '-' (not '--') doesn't match a long option,
+   but does match a short option, it is parsed as a short option
+   instead.  */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+     int argc;
+     char *const *argv;
+     const char *options;
+     const struct option *long_options;
+     int *opt_index;
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+
+#endif	/* _LIBC or not __GNU_LIBRARY__.  */
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+      int option_index = 0;
+      static struct option long_options[] =
+      {
+	{"add", 1, 0, 0},
+	{"append", 0, 0, 0},
+	{"delete", 1, 0, 0},
+	{"verbose", 0, 0, 0},
+	{"create", 0, 0, 0},
+	{"file", 1, 0, 0},
+	{0, 0, 0, 0}
+      };
+
+      c = getopt_long (argc, argv, "abc:d:0123456789",
+		       long_options, &option_index);
+      if (c == EOF)
+	break;
+
+      switch (c)
+	{
+	case 0:
+	  printf ("option %s", long_options[option_index].name);
+	  if (optarg)
+	    printf (" with arg %s", optarg);
+	  printf ("\n");
+	  break;
+
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	  if (digit_optind != 0 && digit_optind != this_option_optind)
+	    printf ("digits occur in two different argv-elements.\n");
+	  digit_optind = this_option_optind;
+	  printf ("option %c\n", c);
+	  break;
+
+	case 'a':
+	  printf ("option a\n");
+	  break;
+
+	case 'b':
+	  printf ("option b\n");
+	  break;
+
+	case 'c':
+	  printf ("option c with value `%s'\n", optarg);
+	  break;
+
+	case 'd':
+	  printf ("option d with value `%s'\n", optarg);
+	  break;
+
+	case '?':
+	  break;
+
+	default:
+	  printf ("?? getopt returned character code 0%o ??\n", c);
+	}
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+	printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */

+ 428 - 0
sys/src/ape/cmd/diff/ifdef.c

@@ -0,0 +1,428 @@
+/* #ifdef-format output routines for GNU DIFF.
+   Copyright (C) 1989, 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY.  No author or distributor
+accepts responsibility to anyone for the consequences of using it
+or for whether it serves any particular purpose or works at all,
+unless he says so in writing.  Refer to the GNU DIFF General Public
+License for full details.
+
+Everyone is granted permission to copy, modify and redistribute
+GNU DIFF, but only under the conditions described in the
+GNU DIFF General Public License.   A copy of this license is
+supposed to have been given to you along with GNU DIFF so you
+can know your rights and responsibilities.  It should be in a
+file named COPYING.  Among other things, the copyright notice
+and this notice must be preserved on all copies.  */
+
+
+#include "diff.h"
+
+struct group
+{
+  struct file_data const *file;
+  int from, upto; /* start and limit lines for this group of lines */
+};
+
+static char *format_group PARAMS((FILE *, char *, int, struct group const *));
+static char *scan_char_literal PARAMS((char *, int *));
+static char *scan_printf_spec PARAMS((char *));
+static int groups_letter_value PARAMS((struct group const *, int));
+static void format_ifdef PARAMS((char *, int, int, int, int));
+static void print_ifdef_hunk PARAMS((struct change *));
+static void print_ifdef_lines PARAMS((FILE *, char *, struct group const *));
+
+static int next_line;
+
+/* Print the edit-script SCRIPT as a merged #ifdef file.  */
+
+void
+print_ifdef_script (script)
+     struct change *script;
+{
+  next_line = - files[0].prefix_lines;
+  print_script (script, find_change, print_ifdef_hunk);
+  if (next_line < files[0].valid_lines)
+    {
+      begin_output ();
+      format_ifdef (group_format[UNCHANGED], next_line, files[0].valid_lines,
+		    next_line - files[0].valid_lines + files[1].valid_lines,
+		    files[1].valid_lines);
+    }
+}
+
+/* Print a hunk of an ifdef diff.
+   This is a contiguous portion of a complete edit script,
+   describing changes in consecutive lines.  */
+
+static void
+print_ifdef_hunk (hunk)
+     struct change *hunk;
+{
+  int first0, last0, first1, last1, deletes, inserts;
+  char *format;
+
+  /* Determine range of line numbers involved in each file.  */
+  analyze_hunk (hunk, &first0, &last0, &first1, &last1, &deletes, &inserts);
+  if (inserts)
+    format = deletes ? group_format[CHANGED] : group_format[NEW];
+  else if (deletes)
+    format = group_format[OLD];
+  else
+    return;
+
+  begin_output ();
+
+  /* Print lines up to this change.  */
+  if (next_line < first0)
+    format_ifdef (group_format[UNCHANGED], next_line, first0,
+		  next_line - first0 + first1, first1);
+
+  /* Print this change.  */
+  next_line = last0 + 1;
+  format_ifdef (format, first0, next_line, first1, last1 + 1);
+}
+
+/* Print a set of lines according to FORMAT.
+   Lines BEG0 up to END0 are from the first file;
+   lines BEG1 up to END1 are from the second file.  */
+
+static void
+format_ifdef (format, beg0, end0, beg1, end1)
+     char *format;
+     int beg0, end0, beg1, end1;
+{
+  struct group groups[2];
+
+  groups[0].file = &files[0];
+  groups[0].from = beg0;
+  groups[0].upto = end0;
+  groups[1].file = &files[1];
+  groups[1].from = beg1;
+  groups[1].upto = end1;
+  format_group (outfile, format, '\0', groups);
+}
+
+/* Print to file OUT a set of lines according to FORMAT.
+   The format ends at the first free instance of ENDCHAR.
+   Yield the address of the terminating character.
+   GROUPS specifies which lines to print.
+   If OUT is zero, do not actually print anything; just scan the format.  */
+
+static char *
+format_group (out, format, endchar, groups)
+     register FILE *out;
+     char *format;
+     int endchar;
+     struct group const *groups;
+{
+  register char c;
+  register char *f = format;
+
+  while ((c = *f) != endchar && c != 0)
+    {
+      f++;
+      if (c == '%')
+	{
+	  char *spec = f;
+	  switch ((c = *f++))
+	    {
+	    case '%':
+	      break;
+
+	    case '(':
+	      /* Print if-then-else format e.g. `%(n=1?thenpart:elsepart)'.  */
+	      {
+		int i, value[2];
+		FILE *thenout, *elseout;
+
+		for (i = 0; i < 2; i++)
+		  {
+		    unsigned char f0 = f[0];
+		    if (ISDIGIT (f0))
+		      {
+			value[i] = atoi (f);
+			while (ISDIGIT ((unsigned char) *++f))
+			  continue;
+		      }
+		    else
+		      {
+			value[i] = groups_letter_value (groups, f0);
+			if (value[i] < 0)
+			  goto bad_format;
+			f++;
+		      }
+		    if (*f++ != "=?"[i])
+		      goto bad_format;
+		  }
+		if (value[0] == value[1])
+		  thenout = out, elseout = 0;
+		else
+		  thenout = 0, elseout = out;
+		f = format_group (thenout, f, ':', groups);
+		if (*f)
+		  {
+		    f = format_group (elseout, f + 1, ')', groups);
+		    if (*f)
+		      f++;
+		  }
+	      }
+	      continue;
+
+	    case '<':
+	      /* Print lines deleted from first file.  */
+	      print_ifdef_lines (out, line_format[OLD], &groups[0]);
+	      continue;
+
+	    case '=':
+	      /* Print common lines.  */
+	      print_ifdef_lines (out, line_format[UNCHANGED], &groups[0]);
+	      continue;
+
+	    case '>':
+	      /* Print lines inserted from second file.  */
+	      print_ifdef_lines (out, line_format[NEW], &groups[1]);
+	      continue;
+
+	    default:
+	      {
+		int value;
+		char *speclim;
+
+		f = scan_printf_spec (spec);
+		if (!f)
+		  goto bad_format;
+		speclim = f;
+		c = *f++;
+		switch (c)
+		  {
+		    case '\'':
+		      f = scan_char_literal (f, &value);
+		      if (!f)
+			goto bad_format;
+		      break;
+
+		    default:
+		      value = groups_letter_value (groups, c);
+		      if (value < 0)
+			goto bad_format;
+		      break;
+		  }
+		if (out)
+		  {
+		    /* Temporarily replace e.g. "%3dnx" with "%3d\0x".  */
+		    *speclim = 0;
+		    fprintf (out, spec - 1, value);
+		    /* Undo the temporary replacement.  */
+		    *speclim = c;
+		  }
+	      }
+	      continue;
+
+	    bad_format:
+	      c = '%';
+	      f = spec;
+	      break;
+	    }
+	}
+      if (out)
+	putc (c, out);
+    }
+  return f;
+}
+
+/* For the line group pair G, return the number corresponding to LETTER.
+   Return -1 if LETTER is not a group format letter.  */
+static int
+groups_letter_value (g, letter)
+     struct group const *g;
+     int letter;
+{
+  if (ISUPPER (letter))
+    {
+      g++;
+      letter = tolower (letter);
+    }
+  switch (letter)
+    {
+      case 'e': return translate_line_number (g->file, g->from) - 1;
+      case 'f': return translate_line_number (g->file, g->from);
+      case 'l': return translate_line_number (g->file, g->upto) - 1;
+      case 'm': return translate_line_number (g->file, g->upto);
+      case 'n': return g->upto - g->from;
+      default: return -1;
+    }
+}
+
+/* Print to file OUT, using FORMAT to print the line group GROUP.
+   But do nothing if OUT is zero.  */
+static void
+print_ifdef_lines (out, format, group)
+     register FILE *out;
+     char *format;
+     struct group const *group;
+{
+  struct file_data const *file = group->file;
+  char const * const *linbuf = file->linbuf;
+  int from = group->from, upto = group->upto;
+
+  if (!out)
+    return;
+
+  /* If possible, use a single fwrite; it's faster.  */
+  if (!tab_expand_flag && format[0] == '%')
+    {
+      if (format[1] == 'l' && format[2] == '\n' && !format[3])
+	{
+	  fwrite (linbuf[from], sizeof (char),
+		  linbuf[upto] + (linbuf[upto][-1] != '\n') -  linbuf[from],
+		  out);
+	  return;
+	}
+      if (format[1] == 'L' && !format[2])
+	{
+	  fwrite (linbuf[from], sizeof (char),
+		  linbuf[upto] -  linbuf[from], out);
+	  return;
+	}
+    }
+
+  for (;  from < upto;  from++)
+    {
+      register char c;
+      register char *f = format;
+
+      while ((c = *f++) != 0)
+	{
+	  if (c == '%')
+	    {
+	      char *spec = f;
+	      switch ((c = *f++))
+		{
+		case '%':
+		  break;
+
+		case 'l':
+		  output_1_line (linbuf[from],
+				 linbuf[from + 1]
+				   - (linbuf[from + 1][-1] == '\n'), 0, 0);
+		  continue;
+
+		case 'L':
+		  output_1_line (linbuf[from], linbuf[from + 1], 0, 0);
+		  continue;
+
+		default:
+		  {
+		    int value;
+		    char *speclim;
+
+		    f = scan_printf_spec (spec);
+		    if (!f)
+		      goto bad_format;
+		    speclim = f;
+		    c = *f++;
+		    switch (c)
+		      {
+			case '\'':
+			  f = scan_char_literal (f, &value);
+			  if (!f)
+			    goto bad_format;
+			  break;
+
+			case 'n':
+			  value = translate_line_number (file, from);
+			  break;
+
+			default:
+			  goto bad_format;
+		      }
+		    /* Temporarily replace e.g. "%3dnx" with "%3d\0x".  */
+		    *speclim = 0;
+		    fprintf (out, spec - 1, value);
+		    /* Undo the temporary replacement.  */
+		    *speclim = c;
+		  }
+		  continue;
+
+		bad_format:
+		  c = '%';
+		  f = spec;
+		  break;
+		}
+	    }
+	  putc (c, out);
+	}
+    }
+}
+
+/* Scan the character literal represented in the string LIT; LIT points just
+   after the initial apostrophe.  Put the literal's value into *INTPTR.
+   Yield the address of the first character after the closing apostrophe,
+   or zero if the literal is ill-formed.  */
+static char *
+scan_char_literal (lit, intptr)
+     char *lit;
+     int *intptr;
+{
+  register char *p = lit;
+  int value, digits;
+  char c = *p++;
+
+  switch (c)
+    {
+      case 0:
+      case '\'':
+	return 0;
+
+      case '\\':
+	value = 0;
+	while ((c = *p++) != '\'')
+	  {
+	    unsigned digit = c - '0';
+	    if (8 <= digit)
+	      return 0;
+	    value = 8 * value + digit;
+	  }
+	digits = p - lit - 2;
+	if (! (1 <= digits && digits <= 3))
+	  return 0;
+	break;
+
+      default:
+	value = c;
+	if (*p++ != '\'')
+	  return 0;
+	break;
+    }
+  *intptr = value;
+  return p;
+}
+
+/* Scan optional printf-style SPEC of the form `-*[0-9]*(.[0-9]*)?[cdoxX]'.
+   Return the address of the character following SPEC, or zero if failure.  */
+static char *
+scan_printf_spec (spec)
+     register char *spec;
+{
+  register unsigned char c;
+
+  while ((c = *spec++) == '-')
+    continue;
+  while (ISDIGIT (c))
+    c = *spec++;
+  if (c == '.')
+    while (ISDIGIT (c = *spec++))
+      continue;
+  switch (c)
+    {
+      case 'c': case 'd': case 'o': case 'x': case 'X':
+	return spec;
+
+      default:
+	return 0;
+    }
+}

+ 238 - 0
sys/src/ape/cmd/diff/install-sh

@@ -0,0 +1,238 @@
+#!/bin/sh
+#
+# install - install a program, script, or datafile
+# This comes from X11R5.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.
+#
+
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+tranformbasename=""
+transform_arg=""
+instcmd="$mvprog"
+chmodcmd="$chmodprog 0755"
+chowncmd=""
+chgrpcmd=""
+stripcmd=""
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=""
+dst=""
+dir_arg=""
+
+while [ x"$1" != x ]; do
+    case $1 in
+	-c) instcmd="$cpprog"
+	    shift
+	    continue;;
+
+	-d) dir_arg=true
+	    shift
+	    continue;;
+
+	-m) chmodcmd="$chmodprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-o) chowncmd="$chownprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-g) chgrpcmd="$chgrpprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-s) stripcmd="$stripprog"
+	    shift
+	    continue;;
+
+	-t=*) transformarg=`echo $1 | sed 's/-t=//'`
+	    shift
+	    continue;;
+
+	-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+	    shift
+	    continue;;
+
+	*)  if [ x"$src" = x ]
+	    then
+		src=$1
+	    else
+		# this colon is to work around a 386BSD /bin/sh bug
+		:
+		dst=$1
+	    fi
+	    shift
+	    continue;;
+    esac
+done
+
+if [ x"$src" = x ]
+then
+	echo "install:	no input file specified"
+	exit 1
+else
+	true
+fi
+
+if [ x"$dir_arg" != x ]; then
+	dst=$src
+	src=""
+	
+	if [ -d $dst ]; then
+		instcmd=:
+	else
+		instcmd=mkdir
+	fi
+else
+
+# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+# might cause directories to be created, which would be especially bad 
+# if $src (and thus $dsttmp) contains '*'.
+
+	if [ -f $src -o -d $src ]
+	then
+		true
+	else
+		echo "install:  $src does not exist"
+		exit 1
+	fi
+	
+	if [ x"$dst" = x ]
+	then
+		echo "install:	no destination specified"
+		exit 1
+	else
+		true
+	fi
+
+# If destination is a directory, append the input filename; if your system
+# does not like double slashes in filenames, you may need to add some logic
+
+	if [ -d $dst ]
+	then
+		dst="$dst"/`basename $src`
+	else
+		true
+	fi
+fi
+
+## this sed command emulates the dirname command
+dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+# Make sure that the destination directory exists.
+#  this part is taken from Noah Friedman's mkinstalldirs script
+
+# Skip lots of stat calls in the usual case.
+if [ ! -d "$dstdir" ]; then
+defaultIFS='	
+'
+IFS="${IFS-${defaultIFS}}"
+
+oIFS="${IFS}"
+# Some sh's can't handle IFS=/ for some reason.
+IFS='%'
+set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
+IFS="${oIFS}"
+
+pathcomp=''
+
+while [ $# -ne 0 ] ; do
+	pathcomp="${pathcomp}${1}"
+	shift
+
+	if [ ! -d "${pathcomp}" ] ;
+        then
+		$mkdirprog "${pathcomp}"
+	else
+		true
+	fi
+
+	pathcomp="${pathcomp}/"
+done
+fi
+
+if [ x"$dir_arg" != x ]
+then
+	$doit $instcmd $dst &&
+
+	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
+	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
+	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
+	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
+else
+
+# If we're going to rename the final executable, determine the name now.
+
+	if [ x"$transformarg" = x ] 
+	then
+		dstfile=`basename $dst`
+	else
+		dstfile=`basename $dst $transformbasename | 
+			sed $transformarg`$transformbasename
+	fi
+
+# don't allow the sed command to completely eliminate the filename
+
+	if [ x"$dstfile" = x ] 
+	then
+		dstfile=`basename $dst`
+	else
+		true
+	fi
+
+# Make a temp file name in the proper directory.
+
+	dsttmp=$dstdir/#inst.$$#
+
+# Move or copy the file name to the temp name
+
+	$doit $instcmd $src $dsttmp &&
+
+	trap "rm -f ${dsttmp}" 0 &&
+
+# and set any options; do chmod last to preserve setuid bits
+
+# If any of these fail, we abort the whole thing.  If we want to
+# ignore errors from any of these, just make sure not to ignore
+# errors from the above "$doit $instcmd $src $dsttmp" command.
+
+	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
+	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
+	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
+	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
+
+# Now rename the file to the real destination.
+
+	$doit $rmcmd -f $dstdir/$dstfile &&
+	$doit $mvcmd $dsttmp $dstdir/$dstfile 
+
+fi &&
+
+
+exit 0

+ 714 - 0
sys/src/ape/cmd/diff/io.c

@@ -0,0 +1,714 @@
+/* File I/O for GNU DIFF.
+   Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#include "diff.h"
+
+/* Rotate a value n bits to the left. */
+#define UINT_BIT (sizeof (unsigned) * CHAR_BIT)
+#define ROL(v, n) ((v) << (n) | (v) >> (UINT_BIT - (n)))
+
+/* Given a hash value and a new character, return a new hash value. */
+#define HASH(h, c) ((c) + ROL (h, 7))
+
+/* Guess remaining number of lines from number N of lines so far,
+   size S so far, and total size T.  */
+#define GUESS_LINES(n,s,t) (((t) - (s)) / ((n) < 10 ? 32 : (s) / ((n)-1)) + 5)
+
+/* Type used for fast prefix comparison in find_identical_ends.  */
+#ifndef word
+#define word int
+#endif
+
+/* Lines are put into equivalence classes (of lines that match in line_cmp).
+   Each equivalence class is represented by one of these structures,
+   but only while the classes are being computed.
+   Afterward, each class is represented by a number.  */
+struct equivclass
+{
+  int next;	/* Next item in this bucket. */
+  unsigned hash;	/* Hash of lines in this class.  */
+  char const *line;	/* A line that fits this class. */
+  size_t length;	/* That line's length, not counting its newline.  */
+};
+
+/* Hash-table: array of buckets, each being a chain of equivalence classes.
+   buckets[-1] is reserved for incomplete lines.  */
+static int *buckets;
+
+/* Number of buckets in the hash table array, not counting buckets[-1]. */
+static int nbuckets;
+
+/* Array in which the equivalence classes are allocated.
+   The bucket-chains go through the elements in this array.
+   The number of an equivalence class is its index in this array.  */
+static struct equivclass *equivs;
+
+/* Index of first free element in the array `equivs'.  */
+static int equivs_index;
+
+/* Number of elements allocated in the array `equivs'.  */
+static int equivs_alloc;
+
+static void find_and_hash_each_line PARAMS((struct file_data *));
+static void find_identical_ends PARAMS((struct file_data[]));
+static void prepare_text_end PARAMS((struct file_data *));
+
+/* Check for binary files and compare them for exact identity.  */
+
+/* Return 1 if BUF contains a non text character.
+   SIZE is the number of characters in BUF.  */
+
+#define binary_file_p(buf, size) (memchr (buf, '\0', size) != 0)
+
+/* Get ready to read the current file.
+   Return nonzero if SKIP_TEST is zero,
+   and if it appears to be a binary file.  */
+
+int
+sip (current, skip_test)
+     struct file_data *current;
+     int skip_test;
+{
+  /* If we have a nonexistent file at this stage, treat it as empty.  */
+  if (current->desc < 0)
+    {
+      /* Leave room for a sentinel.  */
+      current->bufsize = sizeof (word);
+      current->buffer = xmalloc (current->bufsize);
+    }
+  else
+    {
+      current->bufsize = STAT_BLOCKSIZE (current->stat);
+      current->buffer = xmalloc (current->bufsize);
+
+      if (! skip_test)
+	{
+	  /* Check first part of file to see if it's a binary file.  */
+#if HAVE_SETMODE
+	  int oldmode = setmode (current->desc, O_BINARY);
+#endif
+	  size_t n = read (current->desc, current->buffer, current->bufsize);
+	  if (n == -1)
+	    pfatal_with_name (current->name);
+	  current->buffered_chars = n;
+#if HAVE_SETMODE
+	  if (oldmode != O_BINARY)
+	    {
+	      if (lseek (current->desc, - (off_t) n, SEEK_CUR) == -1)
+		pfatal_with_name (current->name);
+	      setmode (current->desc, oldmode);
+	      current->buffered_chars = 0;
+	    }
+#endif
+	  return binary_file_p (current->buffer, n);
+	}
+    }
+
+  current->buffered_chars = 0;
+  return 0;
+}
+
+/* Slurp the rest of the current file completely into memory.  */
+
+void
+slurp (current)
+     struct file_data *current;
+{
+  size_t cc;
+
+  if (current->desc < 0)
+    /* The file is nonexistent.  */
+    ;
+  else if (S_ISREG (current->stat.st_mode))
+    {
+      /* It's a regular file; slurp in the rest all at once.  */
+
+      /* Get the size out of the stat block.
+	 Allocate enough room for appended newline and sentinel.  */
+      cc = current->stat.st_size + 1 + sizeof (word);
+      if (current->bufsize < cc)
+	{
+	  current->bufsize = cc;
+	  current->buffer = xrealloc (current->buffer, cc);
+	}
+
+      if (current->buffered_chars < current->stat.st_size)
+	{
+	  cc = read (current->desc,
+		     current->buffer + current->buffered_chars,
+		     current->stat.st_size - current->buffered_chars);
+	  if (cc == -1)
+	    pfatal_with_name (current->name);
+	  current->buffered_chars += cc;
+	}
+    }
+  /* It's not a regular file; read it, growing the buffer as needed.  */
+  else if (always_text_flag || current->buffered_chars != 0)
+    {
+      for (;;)
+	{
+	  if (current->buffered_chars == current->bufsize)
+	    {
+	      current->bufsize = current->bufsize * 2;
+	      current->buffer = xrealloc (current->buffer, current->bufsize);
+	    }
+	  cc = read (current->desc,
+		     current->buffer + current->buffered_chars,
+		     current->bufsize - current->buffered_chars);
+	  if (cc == 0)
+	    break;
+	  if (cc == -1)
+	    pfatal_with_name (current->name);
+	  current->buffered_chars += cc;
+	}
+      /* Allocate just enough room for appended newline and sentinel.  */
+      current->bufsize = current->buffered_chars + 1 + sizeof (word);
+      current->buffer = xrealloc (current->buffer, current->bufsize);
+    }
+}
+
+/* Split the file into lines, simultaneously computing the equivalence class for
+   each line. */
+
+static void
+find_and_hash_each_line (current)
+     struct file_data *current;
+{
+  unsigned h;
+  unsigned char const *p = (unsigned char const *) current->prefix_end;
+  unsigned char c;
+  int i, *bucket;
+  size_t length;
+
+  /* Cache often-used quantities in local variables to help the compiler.  */
+  char const **linbuf = current->linbuf;
+  int alloc_lines = current->alloc_lines;
+  int line = 0;
+  int linbuf_base = current->linbuf_base;
+  int *cureqs = (int *) xmalloc (alloc_lines * sizeof (int));
+  struct equivclass *eqs = equivs;
+  int eqs_index = equivs_index;
+  int eqs_alloc = equivs_alloc;
+  char const *suffix_begin = current->suffix_begin;
+  char const *bufend = current->buffer + current->buffered_chars;
+  int use_line_cmp = ignore_some_line_changes;
+
+  while ((char const *) p < suffix_begin)
+    {
+      char const *ip = (char const *) p;
+
+      /* Compute the equivalence class for this line.  */
+
+      h = 0;
+
+      /* Hash this line until we find a newline. */
+      if (ignore_case_flag)
+	{
+	  if (ignore_all_space_flag)
+	    while ((c = *p++) != '\n')
+	      {
+		if (! ISSPACE (c))
+		  h = HASH (h, ISUPPER (c) ? tolower (c) : c);
+	      }
+	  else if (ignore_space_change_flag)
+	    while ((c = *p++) != '\n')
+	      {
+		if (ISSPACE (c))
+		  {
+		    for (;;)
+		      {
+			c = *p++;
+			if (!ISSPACE (c))
+			  break;
+			if (c == '\n')
+			  goto hashing_done;
+		      }
+		    h = HASH (h, ' ');
+		  }
+		/* C is now the first non-space.  */
+		h = HASH (h, ISUPPER (c) ? tolower (c) : c);
+	      }
+	  else
+	    while ((c = *p++) != '\n')
+	      h = HASH (h, ISUPPER (c) ? tolower (c) : c);
+	}
+      else
+	{
+	  if (ignore_all_space_flag)
+	    while ((c = *p++) != '\n')
+	      {
+		if (! ISSPACE (c))
+		  h = HASH (h, c);
+	      }
+	  else if (ignore_space_change_flag)
+	    while ((c = *p++) != '\n')
+	      {
+		if (ISSPACE (c))
+		  {
+		    for (;;)
+		      {
+			c = *p++;
+			if (!ISSPACE (c))
+			  break;
+			if (c == '\n')
+			  goto hashing_done;
+		      }
+		    h = HASH (h, ' ');
+		  }
+		/* C is now the first non-space.  */
+		h = HASH (h, c);
+	      }
+	  else
+	    while ((c = *p++) != '\n')
+	      h = HASH (h, c);
+	}
+   hashing_done:;
+
+      bucket = &buckets[h % nbuckets];
+      length = (char const *) p - ip - 1;
+
+      if ((char const *) p == bufend
+	  && current->missing_newline
+	  && ROBUST_OUTPUT_STYLE (output_style))
+	{
+	  /* This line is incomplete.  If this is significant,
+	     put the line into bucket[-1].  */
+	  if (! (ignore_space_change_flag | ignore_all_space_flag))
+	    bucket = &buckets[-1];
+
+	  /* Omit the inserted newline when computing linbuf later.  */
+	  p--;
+	  bufend = suffix_begin = (char const *) p;
+	}
+
+      for (i = *bucket;  ;  i = eqs[i].next)
+	if (!i)
+	  {
+	    /* Create a new equivalence class in this bucket. */
+	    i = eqs_index++;
+	    if (i == eqs_alloc)
+	      eqs = (struct equivclass *)
+		      xrealloc (eqs, (eqs_alloc*=2) * sizeof(*eqs));
+	    eqs[i].next = *bucket;
+	    eqs[i].hash = h;
+	    eqs[i].line = ip;
+	    eqs[i].length = length;
+	    *bucket = i;
+	    break;
+	  }
+	else if (eqs[i].hash == h)
+	  {
+	    char const *eqline = eqs[i].line;
+
+	    /* Reuse existing equivalence class if the lines are identical.
+	       This detects the common case of exact identity
+	       faster than complete comparison would.  */
+	    if (eqs[i].length == length && memcmp (eqline, ip, length) == 0)
+	      break;
+
+	    /* Reuse existing class if line_cmp reports the lines equal.  */
+	    if (use_line_cmp && line_cmp (eqline, ip) == 0)
+	      break;
+	  }
+
+      /* Maybe increase the size of the line table. */
+      if (line == alloc_lines)
+	{
+	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
+	  alloc_lines = 2 * alloc_lines - linbuf_base;
+	  cureqs = (int *) xrealloc (cureqs, alloc_lines * sizeof (*cureqs));
+	  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
+					     (alloc_lines - linbuf_base)
+					     * sizeof (*linbuf))
+		   - linbuf_base;
+	}
+      linbuf[line] = ip;
+      cureqs[line] = i;
+      ++line;
+    }
+
+  current->buffered_lines = line;
+
+  for (i = 0;  ;  i++)
+    {
+      /* Record the line start for lines in the suffix that we care about.
+	 Record one more line start than lines,
+	 so that we can compute the length of any buffered line.  */
+      if (line == alloc_lines)
+	{
+	  /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
+	  alloc_lines = 2 * alloc_lines - linbuf_base;
+	  linbuf = (char const **) xrealloc (linbuf + linbuf_base,
+					     (alloc_lines - linbuf_base)
+					     * sizeof (*linbuf))
+		   - linbuf_base;
+	}
+      linbuf[line] = (char const *) p;
+
+      if ((char const *) p == bufend)
+	break;
+
+      if (context <= i && no_diff_means_no_output)
+	break;
+
+      line++;
+
+      while (*p++ != '\n')
+	;
+    }
+
+  /* Done with cache in local variables.  */
+  current->linbuf = linbuf;
+  current->valid_lines = line;
+  current->alloc_lines = alloc_lines;
+  current->equivs = cureqs;
+  equivs = eqs;
+  equivs_alloc = eqs_alloc;
+  equivs_index = eqs_index;
+}
+
+/* Prepare the end of the text.  Make sure it's initialized.
+   Make sure text ends in a newline,
+   but remember that we had to add one.  */
+
+static void
+prepare_text_end (current)
+     struct file_data *current;
+{
+  size_t buffered_chars = current->buffered_chars;
+  char *p = current->buffer;
+
+  if (buffered_chars == 0 || p[buffered_chars - 1] == '\n')
+    current->missing_newline = 0;
+  else
+    {
+      p[buffered_chars++] = '\n';
+      current->buffered_chars = buffered_chars;
+      current->missing_newline = 1;
+    }
+
+  /* Don't use uninitialized storage when planting or using sentinels.  */
+  if (p)
+    bzero (p + buffered_chars, sizeof (word));
+}
+
+/* Given a vector of two file_data objects, find the identical
+   prefixes and suffixes of each object. */
+
+static void
+find_identical_ends (filevec)
+     struct file_data filevec[];
+{
+  word *w0, *w1;
+  char *p0, *p1, *buffer0, *buffer1;
+  char const *end0, *beg0;
+  char const **linbuf0, **linbuf1;
+  int i, lines;
+  size_t n0, n1, tem;
+  int alloc_lines0, alloc_lines1;
+  int buffered_prefix, prefix_count, prefix_mask;
+
+  slurp (&filevec[0]);
+  if (filevec[0].desc != filevec[1].desc)
+    slurp (&filevec[1]);
+  else
+    {
+      filevec[1].buffer = filevec[0].buffer;
+      filevec[1].bufsize = filevec[0].bufsize;
+      filevec[1].buffered_chars = filevec[0].buffered_chars;
+    }
+  for (i = 0; i < 2; i++)
+    prepare_text_end (&filevec[i]);
+
+  /* Find identical prefix.  */
+
+  p0 = buffer0 = filevec[0].buffer;
+  p1 = buffer1 = filevec[1].buffer;
+
+  n0 = filevec[0].buffered_chars;
+  n1 = filevec[1].buffered_chars;
+
+  if (p0 == p1)
+    /* The buffers are the same; sentinels won't work.  */
+    p0 = p1 += n1;
+  else
+    {
+      /* Insert end sentinels, in this case characters that are guaranteed
+	 to make the equality test false, and thus terminate the loop.  */
+
+      if (n0 < n1)
+	p0[n0] = ~p1[n0];
+      else
+	p1[n1] = ~p0[n1];
+
+      /* Loop until first mismatch, or to the sentinel characters.  */
+
+      /* Compare a word at a time for speed.  */
+      w0 = (word *) p0;
+      w1 = (word *) p1;
+      while (*w0++ == *w1++)
+	;
+      --w0, --w1;
+
+      /* Do the last few bytes of comparison a byte at a time.  */
+      p0 = (char *) w0;
+      p1 = (char *) w1;
+      while (*p0++ == *p1++)
+	;
+      --p0, --p1;
+
+      /* Don't mistakenly count missing newline as part of prefix. */
+      if (ROBUST_OUTPUT_STYLE (output_style)
+	  && (buffer0 + n0 - filevec[0].missing_newline < p0)
+	     !=
+	     (buffer1 + n1 - filevec[1].missing_newline < p1))
+	--p0, --p1;
+    }
+
+  /* Now P0 and P1 point at the first nonmatching characters.  */
+
+  /* Skip back to last line-beginning in the prefix,
+     and then discard up to HORIZON_LINES lines from the prefix.  */
+  i = horizon_lines;
+  while (p0 != buffer0 && (p0[-1] != '\n' || i--))
+    --p0, --p1;
+
+  /* Record the prefix.  */
+  filevec[0].prefix_end = p0;
+  filevec[1].prefix_end = p1;
+
+  /* Find identical suffix.  */
+
+  /* P0 and P1 point beyond the last chars not yet compared.  */
+  p0 = buffer0 + n0;
+  p1 = buffer1 + n1;
+
+  if (! ROBUST_OUTPUT_STYLE (output_style)
+      || filevec[0].missing_newline == filevec[1].missing_newline)
+    {
+      end0 = p0;	/* Addr of last char in file 0.  */
+
+      /* Get value of P0 at which we should stop scanning backward:
+	 this is when either P0 or P1 points just past the last char
+	 of the identical prefix.  */
+      beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
+
+      /* Scan back until chars don't match or we reach that point.  */
+      while (p0 != beg0)
+	if (*--p0 != *--p1)
+	  {
+	    /* Point at the first char of the matching suffix.  */
+	    ++p0, ++p1;
+	    beg0 = p0;
+	    break;
+	  }
+
+      /* Are we at a line-beginning in both files?  If not, add the rest of
+	 this line to the main body.  Discard up to HORIZON_LINES lines from
+	 the identical suffix.  Also, discard one extra line,
+	 because shift_boundaries may need it.  */
+      i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
+			    &&
+			    (buffer1 == p1 || p1[-1] == '\n'));
+      while (i-- && p0 != end0)
+	while (*p0++ != '\n')
+	  ;
+
+      p1 += p0 - beg0;
+    }
+
+  /* Record the suffix.  */
+  filevec[0].suffix_begin = p0;
+  filevec[1].suffix_begin = p1;
+
+  /* Calculate number of lines of prefix to save.
+
+     prefix_count == 0 means save the whole prefix;
+     we need this with for options like -D that output the whole file.
+     We also need it for options like -F that output some preceding line;
+     at least we will need to find the last few lines,
+     but since we don't know how many, it's easiest to find them all.
+
+     Otherwise, prefix_count != 0.  Save just prefix_count lines at start
+     of the line buffer; they'll be moved to the proper location later.
+     Handle 1 more line than the context says (because we count 1 too many),
+     rounded up to the next power of 2 to speed index computation.  */
+
+  if (no_diff_means_no_output && ! function_regexp_list)
+    {
+      for (prefix_count = 1;  prefix_count < context + 1;  prefix_count *= 2)
+	;
+      prefix_mask = prefix_count - 1;
+      alloc_lines0
+	= prefix_count
+	  + GUESS_LINES (0, 0, p0 - filevec[0].prefix_end)
+	  + context;
+    }
+  else
+    {
+      prefix_count = 0;
+      prefix_mask = ~0;
+      alloc_lines0 = GUESS_LINES (0, 0, n0);
+    }
+
+  lines = 0;
+  linbuf0 = (char const **) xmalloc (alloc_lines0 * sizeof (*linbuf0));
+
+  /* If the prefix is needed, find the prefix lines.  */
+  if (! (no_diff_means_no_output
+	 && filevec[0].prefix_end == p0
+	 && filevec[1].prefix_end == p1))
+    {
+      p0 = buffer0;
+      end0 = filevec[0].prefix_end;
+      while (p0 != end0)
+	{
+	  int l = lines++ & prefix_mask;
+	  if (l == alloc_lines0)
+	    linbuf0 = (char const **) xrealloc (linbuf0, (alloc_lines0 *= 2)
+							 * sizeof(*linbuf0));
+	  linbuf0[l] = p0;
+	  while (*p0++ != '\n')
+	    ;
+	}
+    }
+  buffered_prefix = prefix_count && context < lines ? context : lines;
+
+  /* Allocate line buffer 1.  */
+  tem = prefix_count ? filevec[1].suffix_begin - buffer1 : n1;
+
+  alloc_lines1
+    = (buffered_prefix
+       + GUESS_LINES (lines, filevec[1].prefix_end - buffer1, tem)
+       + context);
+  linbuf1 = (char const **) xmalloc (alloc_lines1 * sizeof (*linbuf1));
+
+  if (buffered_prefix != lines)
+    {
+      /* Rotate prefix lines to proper location.  */
+      for (i = 0;  i < buffered_prefix;  i++)
+	linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
+      for (i = 0;  i < buffered_prefix;  i++)
+	linbuf0[i] = linbuf1[i];
+    }
+
+  /* Initialize line buffer 1 from line buffer 0.  */
+  for (i = 0; i < buffered_prefix; i++)
+    linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
+
+  /* Record the line buffer, adjusted so that
+     linbuf*[0] points at the first differing line.  */
+  filevec[0].linbuf = linbuf0 + buffered_prefix;
+  filevec[1].linbuf = linbuf1 + buffered_prefix;
+  filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
+  filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
+  filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
+  filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
+}
+
+/* Largest primes less than some power of two, for nbuckets.  Values range
+   from useful to preposterous.  If one of these numbers isn't prime
+   after all, don't blame it on me, blame it on primes (6) . . . */
+static int const primes[] =
+{
+  509,
+  1021,
+  2039,
+  4093,
+  8191,
+  16381,
+  32749,
+#if 32767 < INT_MAX
+  65521,
+  131071,
+  262139,
+  524287,
+  1048573,
+  2097143,
+  4194301,
+  8388593,
+  16777213,
+  33554393,
+  67108859,			/* Preposterously large . . . */
+  134217689,
+  268435399,
+  536870909,
+  1073741789,
+  2147483647,
+#endif
+  0
+};
+
+/* Given a vector of two file_data objects, read the file associated
+   with each one, and build the table of equivalence classes.
+   Return 1 if either file appears to be a binary file.
+   If PRETEND_BINARY is nonzero, pretend they are binary regardless.  */
+
+int
+read_files (filevec, pretend_binary)
+     struct file_data filevec[];
+     int pretend_binary;
+{
+  int i;
+  int skip_test = always_text_flag | pretend_binary;
+  int appears_binary = pretend_binary | sip (&filevec[0], skip_test);
+
+  if (filevec[0].desc != filevec[1].desc)
+    appears_binary |= sip (&filevec[1], skip_test | appears_binary);
+  else
+    {
+      filevec[1].buffer = filevec[0].buffer;
+      filevec[1].bufsize = filevec[0].bufsize;
+      filevec[1].buffered_chars = filevec[0].buffered_chars;
+    }
+  if (appears_binary)
+    {
+#if HAVE_SETMODE
+      setmode (filevec[0].desc, O_BINARY);
+      setmode (filevec[1].desc, O_BINARY);
+#endif
+      return 1;
+    }
+
+  find_identical_ends (filevec);
+
+  equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
+  equivs = (struct equivclass *) xmalloc (equivs_alloc * sizeof (struct equivclass));
+  /* Equivalence class 0 is permanently safe for lines that were not
+     hashed.  Real equivalence classes start at 1. */
+  equivs_index = 1;
+
+  for (i = 0;  primes[i] < equivs_alloc / 3;  i++)
+    if (! primes[i])
+      abort ();
+  nbuckets = primes[i];
+
+  buckets = (int *) xmalloc ((nbuckets + 1) * sizeof (*buckets));
+  bzero (buckets++, (nbuckets + 1) * sizeof (*buckets));
+
+  for (i = 0; i < 2; i++)
+    find_and_hash_each_line (&filevec[i]);
+
+  filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
+
+  free (equivs);
+  free (buckets - 1);
+
+  return 0;
+}

+ 50 - 0
sys/src/ape/cmd/diff/mkfile

@@ -0,0 +1,50 @@
+MKSHELL=rc
+APE=$NXM/sys/src/ape
+<$APE/config
+
+OFILES=
+HFILES=\
+	system.h\
+	config.h\
+
+TARG=diff diff3
+
+DIFFO=\
+	analyze.$O\
+	cmpbuf.$O\
+	dir.$O\
+	io.$O\
+	util.$O\
+	context.$O\
+	ed.$O\
+	ifdef.$O\
+	normal.$O\
+	side.$O\
+	fnmatch.$O\
+	getopt.$O\
+	getopt1.$O\
+	regex.$O\
+	version.$O\
+	prepend_args.$O\
+
+DIFF3O=\
+	getopt.$O\
+	getopt1.$O\
+	version.$O\
+
+SDIFFO=\
+	getopt.$O\
+	getopt1.$O\
+	version.$O\
+
+BIN=$NXM/$objtype/bin/ape
+
+<$NXM/sys/src/cmd/mkmany
+
+LD=$9LD
+LDFLAGS=$9LDFLAGS
+CFLAGS=-c $CFLAGS -B -p -D_POSIX_SOURCE -DREGEX_MALLOC -I. \
+	-DHAVE_CONFIG_H -DDIFF_PROGRAM="/bin/ape/diff" \
+
+$O.diff: $DIFFO
+$O.diff3: $DIFF3O

+ 71 - 0
sys/src/ape/cmd/diff/normal.c

@@ -0,0 +1,71 @@
+/* Normal-format output routines for GNU DIFF.
+   Copyright (C) 1988, 1989, 1993 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+
+#include "diff.h"
+
+static void print_normal_hunk PARAMS((struct change *));
+
+/* Print the edit-script SCRIPT as a normal diff.
+   INF points to an array of descriptions of the two files.  */
+
+void
+print_normal_script (script)
+     struct change *script;
+{
+  print_script (script, find_change, print_normal_hunk);
+}
+
+/* Print a hunk of a normal diff.
+   This is a contiguous portion of a complete edit script,
+   describing changes in consecutive lines.  */
+
+static void
+print_normal_hunk (hunk)
+     struct change *hunk;
+{
+  int first0, last0, first1, last1, deletes, inserts;
+  register int i;
+
+  /* Determine range of line numbers involved in each file.  */
+  analyze_hunk (hunk, &first0, &last0, &first1, &last1, &deletes, &inserts);
+  if (!deletes && !inserts)
+    return;
+
+  begin_output ();
+
+  /* Print out the line number header for this hunk */
+  print_number_range (',', &files[0], first0, last0);
+  fprintf (outfile, "%c", change_letter (inserts, deletes));
+  print_number_range (',', &files[1], first1, last1);
+  fprintf (outfile, "\n");
+
+  /* Print the lines that the first file has.  */
+  if (deletes)
+    for (i = first0; i <= last0; i++)
+      print_1_line ("<", &files[0].linbuf[i]);
+
+  if (inserts && deletes)
+    fprintf (outfile, "---\n");
+
+  /* Print the lines that the second file has.  */
+  if (inserts)
+    for (i = first1; i <= last1; i++)
+      print_1_line (">", &files[1].linbuf[i]);
+}

+ 87 - 0
sys/src/ape/cmd/diff/prepend_args.c

@@ -0,0 +1,87 @@
+/* prepend_args.c - utilility programs for manpiulating argv[]
+   Copyright (C) 1999 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+   02111-1307, USA.  */
+
+/* $FreeBSD: src/contrib/diff/prepend_args.c,v 1.1 1999/11/26 02:51:44 obrien Exp $ */
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include "system.h"
+#include "prepend_args.h"
+#include "diff.h"
+
+
+/* Find the white-space-separated options specified by OPTIONS, and
+   using BUF to store copies of these options, set ARGV[0], ARGV[1],
+   etc. to the option copies.  Return the number N of options found.
+   Do not set ARGV[N] to NULL.  If ARGV is NULL, do not store ARGV[0]
+   etc.  Backslash can be used to escape whitespace (and backslashes).  */
+static int
+prepend_args (options, buf, argv)
+     char const *options;
+     char *buf;
+     char **argv;
+{
+  char const *o = options;
+  char *b = buf;
+  int n = 0;
+
+  for (;;)
+    {
+      while (ISSPACE ((unsigned char) *o))
+	o++;
+      if (!*o)
+	return n;
+      if (argv)
+	argv[n] = b;
+      n++;
+
+      do
+	if ((*b++ = *o++) == '\\' && *o)
+	  b[-1] = *o++;
+      while (*o && ! ISSPACE ((unsigned char) *o));
+
+      *b++ = '\0';
+    }
+}
+
+/* Prepend the whitespace-separated options in OPTIONS to the argument
+   vector of a main program with argument count *PARGC and argument
+   vector *PARGV.  */
+void
+prepend_default_options (options, pargc, pargv)
+     char const *options;
+     int *pargc;
+     char ***pargv;
+{
+  if (options)
+    {
+      char *buf = xmalloc (strlen (options) + 1);
+      int prepended = prepend_args (options, buf, (char **) NULL);
+      int argc = *pargc;
+      char * const *argv = *pargv;
+      char **pp = (char **) xmalloc ((prepended + argc + 1) * sizeof *pp);
+      *pargc = prepended + argc;
+      *pargv = pp;
+      *pp++ = *argv++;
+      pp += prepend_args (options, buf, pp);
+      while ((*pp++ = *argv++))
+	continue;
+    }
+}

+ 21 - 0
sys/src/ape/cmd/diff/prepend_args.h

@@ -0,0 +1,21 @@
+/* prepend_args.h - utilility programs for manpiulating argv[]
+   Copyright (C) 1999 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+   02111-1307, USA.  */
+
+/* $FreeBSD: src/contrib/diff/prepend_args.h,v 1.1 1999/11/26 02:51:44 obrien Exp $ */
+
+void prepend_default_options PARAMS ((char const *, int *, char ***));

+ 6374 - 0
sys/src/ape/cmd/diff/regex.c

@@ -0,0 +1,6374 @@
+/* Extended regular expression matching and search library, version
+   0.12.  (Implements POSIX draft P10003.2/D11.2, except for
+   internationalization features.)
+
+   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+   USA.	 */
+
+/* AIX requires this to be the first thing in the file. */
+#if defined (_AIX) && !defined (REGEX_MALLOC)
+  #pragma alloca
+#endif
+
+#undef	_GNU_SOURCE
+#define _GNU_SOURCE
+
+#ifdef emacs
+/* Converts the pointer to the char to BEG-based offset from the start.	 */
+#define PTR_TO_OFFSET(d)						\
+	POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING			\
+			  ? (d) - string1 : (d) - (string2 - size1))
+#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
+#else
+#define PTR_TO_OFFSET(d) 0
+#endif
+
+#include "config.h"
+
+/* We need this for `regex.h', and perhaps for the Emacs include files.	 */
+#include <sys/types.h>
+
+/* This is for other GNU distributions with internationalized messages.	 */
+#if HAVE_LIBINTL_H || defined (_LIBC)
+# include <libintl.h>
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+   strings.  */
+#define gettext_noop(String) String
+#endif
+
+/* The `emacs' switch turns on certain matching commands
+   that make sense only in Emacs. */
+#ifdef emacs
+
+#include "lisp.h"
+#include "buffer.h"
+
+/* Make syntax table lookup grant data in gl_state.  */
+#define SYNTAX_ENTRY_VIA_PROPERTY
+
+#include "syntax.h"
+#include "charset.h"
+#include "category.h"
+
+#define malloc xmalloc
+#define realloc xrealloc
+#define free xfree
+
+#else  /* not emacs */
+
+/* If we are not linking with Emacs proper,
+   we can't use the relocating allocator
+   even if config.h says that we can.  */
+#undef REL_ALLOC
+
+#if defined (STDC_HEADERS) || defined (_LIBC)
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
+   If nothing else has been done, use the method below.	 */
+#ifdef INHIBIT_STRING_HEADER
+#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
+#if !defined (bzero) && !defined (bcopy)
+#undef INHIBIT_STRING_HEADER
+#endif
+#endif
+#endif
+
+/* This is the normal way of making sure we have a bcopy and a bzero.
+   This is used in most programs--a few other programs avoid this
+   by defining INHIBIT_STRING_HEADER.  */
+#ifndef INHIBIT_STRING_HEADER
+#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
+#include <string.h>
+#ifndef bcmp
+#define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
+#endif
+#ifndef bcopy
+#define bcopy(s, d, n)	memcpy ((d), (s), (n))
+#endif
+#ifndef bzero
+#define bzero(s, n)	memset ((s), 0, (n))
+#endif
+#else
+#include <strings.h>
+#endif
+#endif
+
+/* Define the syntax stuff for \<, \>, etc.  */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+   commands in re_match_2.  */
+#ifndef Sword
+#define Sword 1
+#endif
+
+#ifdef SWITCH_ENUM_BUG
+#define SWITCH_ENUM_CAST(x) ((int)(x))
+#else
+#define SWITCH_ENUM_CAST(x) (x)
+#endif
+
+#ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+#else /* not SYNTAX_TABLE */
+
+/* How many characters in the character set.  */
+#define CHAR_SET_SIZE 256
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void
+init_syntax_once ()
+{
+   register int c;
+   static int done = 0;
+
+   if (done)
+     return;
+
+   bzero (re_syntax_table, sizeof re_syntax_table);
+
+   for (c = 'a'; c <= 'z'; c++)
+     re_syntax_table[c] = Sword;
+
+   for (c = 'A'; c <= 'Z'; c++)
+     re_syntax_table[c] = Sword;
+
+   for (c = '0'; c <= '9'; c++)
+     re_syntax_table[c] = Sword;
+
+   re_syntax_table['_'] = Sword;
+
+   done = 1;
+}
+
+#endif /* not SYNTAX_TABLE */
+
+#define SYNTAX(c) re_syntax_table[c]
+
+/* Dummy macros for non-Emacs environments.  */
+#define BASE_LEADING_CODE_P(c) (0)
+#define WORD_BOUNDARY_P(c1, c2) (0)
+#define CHAR_HEAD_P(p) (1)
+#define SINGLE_BYTE_CHAR_P(c) (1)
+#define SAME_CHARSET_P(c1, c2) (1)
+#define MULTIBYTE_FORM_LENGTH(p, s) (1)
+#define STRING_CHAR(p, s) (*(p))
+#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
+#define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
+  (c = ((p) == (end1) ? *(str2) : *(p)))
+#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
+  (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
+#endif /* not emacs */
+
+/* Get the interface, including the syntax bits.  */
+#include "regex.h"
+
+/* isalpha etc. are used for the character classes.  */
+#include <ctype.h>
+
+/* Jim Meyering writes:
+
+   "... Some ctype macros are valid only for character codes that
+   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
+   using /bin/cc or gcc but without giving an ansi option).  So, all
+   ctype uses should be through macros like ISPRINT...	If
+   STDC_HEADERS is defined, then autoconf has verified that the ctype
+   macros don't need to be guarded with references to isascii. ...
+   Defining isascii to 1 should let any compiler worth its salt
+   eliminate the && through constant folding."	*/
+
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+#define ISASCII(c) 1
+#else
+#define ISASCII(c) isascii(c)
+#endif
+
+#ifdef isblank
+#define ISBLANK(c) (ISASCII (c) && isblank (c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+#else
+#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+#endif
+
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+#ifndef NULL
+#define NULL (void *)0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+   since ours (we hope) works properly with all combinations of
+   machines, compilers, `char' and `unsigned char' argument types.
+   (Per Bothner suggested the basic approach.)	*/
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else  /* not __STDC__ */
+/* As in Harbison and Steele.  */
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
+   use `alloca' instead of `malloc'.  This is because using malloc in
+   re_search* or re_match* could cause memory leaks when C-g is used in
+   Emacs; also, malloc is slower and causes storage fragmentation.  On
+   the other hand, malloc is more portable, and easier to debug.
+
+   Because we sometimes use alloca, some routines have to be macros,
+   not functions -- `alloca'-allocated space disappears at the end of the
+   function it is called in.  */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE malloc
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+#define REGEX_FREE free
+
+#else /* not REGEX_MALLOC  */
+
+/* Emacs already defines alloca, sometimes.  */
+#ifndef alloca
+
+/* Make alloca work the best possible way.  */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
+#ifndef _AIX /* Already did AIX, up at the top.	 */
+char *alloca ();
+#endif /* not _AIX */
+#endif
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+#define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable.  */
+#define REGEX_REALLOCATE(source, osize, nsize)				\
+  (destination = (char *) alloca (nsize),				\
+   bcopy (source, destination, osize),					\
+   destination)
+
+/* No need to do anything to free, after alloca.  */
+#define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
+
+#endif /* not REGEX_MALLOC */
+
+/* Define how to allocate the failure stack.  */
+
+#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
+
+#define REGEX_ALLOCATE_STACK(size)				\
+  r_alloc (&failure_stack_ptr, (size))
+#define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
+  r_re_alloc (&failure_stack_ptr, (nsize))
+#define REGEX_FREE_STACK(ptr)					\
+  r_alloc_free (&failure_stack_ptr)
+
+#else /* not using relocating allocator */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE_STACK malloc
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+#define REGEX_FREE_STACK free
+
+#else /* not REGEX_MALLOC */
+
+#define REGEX_ALLOCATE_STACK alloca
+
+#define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
+   REGEX_REALLOCATE (source, osize, nsize)
+/* No need to explicitly free anything.	 */
+#define REGEX_FREE_STACK(arg)
+
+#endif /* not REGEX_MALLOC */
+#endif /* not using relocating allocator */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+   `string1' or just past its end.  This works if PTR is NULL, which is
+   a good thing.  */
+#define FIRST_STRING_P(ptr)					\
+  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail.  */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define RETALLOC_IF(addr, n, t) \
+  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits.	 */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+static int re_match_2_internal ();
+
+/* These are the command codes that appear in compiled regular
+   expressions.	 Some opcodes are followed by argument bytes.  A
+   command code can specify any interpretation whatsoever for its
+   arguments.  Zero bytes may appear in the compiled regular expression.  */
+
+typedef enum
+{
+  no_op = 0,
+
+  /* Succeed right away--no more backtracking.	*/
+  succeed,
+
+	/* Followed by one byte giving n, then by n literal bytes.  */
+  exactn,
+
+	/* Matches any (more or less) character.  */
+  anychar,
+
+	/* Matches any one char belonging to specified set.  First
+	   following byte is number of bitmap bytes.  Then come bytes
+	   for a bitmap saying which chars are in.  Bits in each byte
+	   are ordered low-bit-first.  A character is in the set if its
+	   bit is 1.  A character too large to have a bit in the map is
+	   automatically not in the set.  */
+  charset,
+
+	/* Same parameters as charset, but match any character that is
+	   not one of those specified.	*/
+  charset_not,
+
+	/* Start remembering the text that is matched, for storing in a
+	   register.  Followed by one byte with the register number, in
+	   the range 0 to one less than the pattern buffer's re_nsub
+	   field.  Then followed by one byte with the number of groups
+	   inner to this one.  (This last has to be part of the
+	   start_memory only because we need it in the on_failure_jump
+	   of re_match_2.)  */
+  start_memory,
+
+	/* Stop remembering the text that is matched and store it in a
+	   memory register.  Followed by one byte with the register
+	   number, in the range 0 to one less than `re_nsub' in the
+	   pattern buffer, and one byte with the number of inner groups,
+	   just like `start_memory'.  (We need the number of inner
+	   groups here because we don't have any easy way of finding the
+	   corresponding start_memory when we're at a stop_memory.)  */
+  stop_memory,
+
+	/* Match a duplicate of something remembered. Followed by one
+	   byte containing the register number.	 */
+  duplicate,
+
+	/* Fail unless at beginning of line.  */
+  begline,
+
+	/* Fail unless at end of line.	*/
+  endline,
+
+	/* Succeeds if at beginning of buffer (if emacs) or at beginning
+	   of string to be matched (if not).  */
+  begbuf,
+
+	/* Analogously, for end of buffer/string.  */
+  endbuf,
+
+	/* Followed by two byte relative address to which to jump.  */
+  jump,
+
+	/* Same as jump, but marks the end of an alternative.  */
+  jump_past_alt,
+
+	/* Followed by two-byte relative address of place to resume at
+	   in case of failure.	*/
+  on_failure_jump,
+
+	/* Like on_failure_jump, but pushes a placeholder instead of the
+	   current string position when executed.  */
+  on_failure_keep_string_jump,
+
+	/* Throw away latest failure point and then jump to following
+	   two-byte relative address.  */
+  pop_failure_jump,
+
+	/* Change to pop_failure_jump if know won't have to backtrack to
+	   match; otherwise change to jump.  This is used to jump
+	   back to the beginning of a repeat.  If what follows this jump
+	   clearly won't match what the repeat does, such that we can be
+	   sure that there is no use backtracking out of repetitions
+	   already matched, then we change it to a pop_failure_jump.
+	   Followed by two-byte address.  */
+  maybe_pop_jump,
+
+	/* Jump to following two-byte address, and push a dummy failure
+	   point. This failure point will be thrown away if an attempt
+	   is made to use it for a failure.  A `+' construct makes this
+	   before the first repeat.  Also used as an intermediary kind
+	   of jump when compiling an alternative.  */
+  dummy_failure_jump,
+
+	/* Push a dummy failure point and continue.  Used at the end of
+	   alternatives.  */
+  push_dummy_failure,
+
+	/* Followed by two-byte relative address and two-byte number n.
+	   After matching N times, jump to the address upon failure.  */
+  succeed_n,
+
+	/* Followed by two-byte relative address, and two-byte number n.
+	   Jump to the address N times, then fail.  */
+  jump_n,
+
+	/* Set the following two-byte relative address to the
+	   subsequent two-byte number.	The address *includes* the two
+	   bytes of number.  */
+  set_number_at,
+
+  wordchar,	/* Matches any word-constituent character.  */
+  notwordchar,	/* Matches any char that is not a word-constituent.  */
+
+  wordbeg,	/* Succeeds if at word beginning.  */
+  wordend,	/* Succeeds if at word end.  */
+
+  wordbound,	/* Succeeds if at a word boundary.  */
+  notwordbound	/* Succeeds if not at a word boundary.	*/
+
+#ifdef emacs
+  ,before_dot,	/* Succeeds if before point.  */
+  at_dot,	/* Succeeds if at point.  */
+  after_dot,	/* Succeeds if after point.  */
+
+	/* Matches any character whose syntax is specified.  Followed by
+	   a byte which contains a syntax code, e.g., Sword.  */
+  syntaxspec,
+
+	/* Matches any character whose syntax is not that specified.  */
+  notsyntaxspec,
+
+  /* Matches any character whose category-set contains the specified
+     category.	The operator is followed by a byte which contains a
+     category code (mnemonic ASCII character).	*/
+  categoryspec,
+
+  /* Matches any character whose category-set does not contain the
+     specified category.  The operator is followed by a byte which
+     contains the category code (mnemonic ASCII character).  */
+  notcategoryspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern.  */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
+
+#define STORE_NUMBER(destination, number)				\
+  do {									\
+    (destination)[0] = (number) & 0377;					\
+    (destination)[1] = (number) >> 8;					\
+  } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+   the byte after where the number is stored.  Therefore, DESTINATION
+   must be an lvalue.  */
+
+#define STORE_NUMBER_AND_INCR(destination, number)			\
+  do {									\
+    STORE_NUMBER (destination, number);					\
+    (destination) += 2;							\
+  } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+   at SOURCE.  */
+
+#define EXTRACT_NUMBER(destination, source)				\
+  do {									\
+    (destination) = *(source) & 0377;					\
+    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
+  } while (0)
+
+#ifdef DEBUG
+static void
+extract_number (dest, source)
+    int *dest;
+    unsigned char *source;
+{
+  int temp = SIGN_EXTEND_CHAR (*(source + 1));
+  *dest = *source & 0377;
+  *dest += temp << 8;
+}
+
+#ifndef EXTRACT_MACROS /* To debug the macros.	*/
+#undef EXTRACT_NUMBER
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+   SOURCE must be an lvalue.  */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
+  do {									\
+    EXTRACT_NUMBER (destination, source);				\
+    (source) += 2;							\
+  } while (0)
+
+#ifdef DEBUG
+static void
+extract_number_and_incr (destination, source)
+    int *destination;
+    unsigned char **source;
+{
+  extract_number (destination, *source);
+  *source += 2;
+}
+
+#ifndef EXTRACT_MACROS
+#undef EXTRACT_NUMBER_AND_INCR
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+  extract_number_and_incr (&dest, &src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Store a multibyte character in three contiguous bytes starting
+   DESTINATION, and increment DESTINATION to the byte after where the
+   character is stored.	 Therefore, DESTINATION must be an lvalue.  */
+
+#define STORE_CHARACTER_AND_INCR(destination, character)	\
+  do {								\
+    (destination)[0] = (character) & 0377;			\
+    (destination)[1] = ((character) >> 8) & 0377;		\
+    (destination)[2] = (character) >> 16;			\
+    (destination) += 3;						\
+  } while (0)
+
+/* Put into DESTINATION a character stored in three contiguous bytes
+   starting at SOURCE.	*/
+
+#define EXTRACT_CHARACTER(destination, source)	\
+  do {						\
+    (destination) = ((source)[0]		\
+		     | ((source)[1] << 8)	\
+		     | ((source)[2] << 16));	\
+  } while (0)
+
+
+/* Macros for charset. */
+
+/* Size of bitmap of charset P in bytes.  P is a start of charset,
+   i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
+#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
+
+/* Nonzero if charset P has range table.  */
+#define CHARSET_RANGE_TABLE_EXISTS_P(p)	 ((p)[1] & 0x80)
+
+/* Return the address of range table of charset P.  But not the start
+   of table itself, but the before where the number of ranges is
+   stored.  `2 +' means to skip re_opcode_t and size of bitmap.	 */
+#define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)])
+
+/* Test if C is listed in the bitmap of charset P.  */
+#define CHARSET_LOOKUP_BITMAP(p, c)				\
+  ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH			\
+   && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
+
+/* Return the address of end of RANGE_TABLE.  COUNT is number of
+   ranges (which is a pair of (start, end)) in the RANGE_TABLE.	 `* 2'
+   is start of range and end of range.	`* 3' is size of each start
+   and end.  */
+#define CHARSET_RANGE_TABLE_END(range_table, count)	\
+  ((range_table) + (count) * 2 * 3)
+
+/* Test if C is in RANGE_TABLE.	 A flag NOT is negated if C is in.
+   COUNT is number of ranges in RANGE_TABLE.  */
+#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)	\
+  do									\
+    {									\
+      int range_start, range_end;					\
+      unsigned char *p;							\
+      unsigned char *range_table_end					\
+	= CHARSET_RANGE_TABLE_END ((range_table), (count));		\
+									\
+      for (p = (range_table); p < range_table_end; p += 2 * 3)		\
+	{								\
+	  EXTRACT_CHARACTER (range_start, p);				\
+	  EXTRACT_CHARACTER (range_end, p + 3);				\
+									\
+	  if (range_start <= (c) && (c) <= range_end)			\
+	    {								\
+	      (not) = !(not);						\
+	      break;							\
+	    }								\
+	}								\
+    }									\
+  while (0)
+
+/* Test if C is in range table of CHARSET.  The flag NOT is negated if
+   C is listed in it.  */
+#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)			\
+  do									\
+    {									\
+      /* Number of ranges in range table. */				\
+      int count;							\
+      unsigned char *range_table = CHARSET_RANGE_TABLE (charset);	\
+									\
+      EXTRACT_NUMBER_AND_INCR (count, range_table);			\
+      CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);	\
+    }									\
+  while (0)
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+   it is doing (if the variable `debug' is nonzero).  If linked with the
+   main program in `iregex.c', you can enter patterns and strings
+   interactively.  And if linked with the main program in `main.c' and
+   the other test files, you can run the already-written tests.	 */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging.  */
+#include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging.  */
+#include <assert.h>
+
+static int debug = 0;
+
+#define DEBUG_STATEMENT(e) e
+#define DEBUG_PRINT1(x) if (debug) printf (x)
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
+  if (debug) print_partial_compiled_pattern (s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
+  if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+/* Print the fastmap in human-readable form.  */
+
+void
+print_fastmap (fastmap)
+    char *fastmap;
+{
+  unsigned was_a_range = 0;
+  unsigned i = 0;
+
+  while (i < (1 << BYTEWIDTH))
+    {
+      if (fastmap[i++])
+	{
+	  was_a_range = 0;
+	  putchar (i - 1);
+	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
+	    {
+	      was_a_range = 1;
+	      i++;
+	    }
+	  if (was_a_range)
+	    {
+	      printf ("-");
+	      putchar (i - 1);
+	    }
+	}
+    }
+  putchar ('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+   the START pointer into it and ending just before the pointer END.  */
+
+void
+print_partial_compiled_pattern (start, end)
+    unsigned char *start;
+    unsigned char *end;
+{
+  int mcnt, mcnt2;
+  unsigned char *p = start;
+  unsigned char *pend = end;
+
+  if (start == NULL)
+    {
+      printf ("(null)\n");
+      return;
+    }
+
+  /* Loop over pattern commands.  */
+  while (p < pend)
+    {
+      printf ("%d:\t", p - start);
+
+      switch ((re_opcode_t) *p++)
+	{
+	case no_op:
+	  printf ("/no_op");
+	  break;
+
+	case exactn:
+	  mcnt = *p++;
+	  printf ("/exactn/%d", mcnt);
+	  do
+	    {
+	      putchar ('/');
+	      putchar (*p++);
+	    }
+	  while (--mcnt);
+	  break;
+
+	case start_memory:
+	  mcnt = *p++;
+	  printf ("/start_memory/%d/%d", mcnt, *p++);
+	  break;
+
+	case stop_memory:
+	  mcnt = *p++;
+	  printf ("/stop_memory/%d/%d", mcnt, *p++);
+	  break;
+
+	case duplicate:
+	  printf ("/duplicate/%d", *p++);
+	  break;
+
+	case anychar:
+	  printf ("/anychar");
+	  break;
+
+	case charset:
+	case charset_not:
+	  {
+	    register int c, last = -100;
+	    register int in_range = 0;
+
+	    printf ("/charset [%s",
+		    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
+
+	    assert (p + *p < pend);
+
+	    for (c = 0; c < 256; c++)
+	      if (c / 8 < *p
+		  && (p[1 + (c/8)] & (1 << (c % 8))))
+		{
+		  /* Are we starting a range?  */
+		  if (last + 1 == c && ! in_range)
+		    {
+		      putchar ('-');
+		      in_range = 1;
+		    }
+		  /* Have we broken a range?  */
+		  else if (last + 1 != c && in_range)
+	      {
+		      putchar (last);
+		      in_range = 0;
+		    }
+
+		  if (! in_range)
+		    putchar (c);
+
+		  last = c;
+	      }
+
+	    if (in_range)
+	      putchar (last);
+
+	    putchar (']');
+
+	    p += 1 + *p;
+	  }
+	  break;
+
+	case begline:
+	  printf ("/begline");
+	  break;
+
+	case endline:
+	  printf ("/endline");
+	  break;
+
+	case on_failure_jump:
+	  extract_number_and_incr (&mcnt, &p);
+	  printf ("/on_failure_jump to %d", p + mcnt - start);
+	  break;
+
+	case on_failure_keep_string_jump:
+	  extract_number_and_incr (&mcnt, &p);
+	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
+	  break;
+
+	case dummy_failure_jump:
+	  extract_number_and_incr (&mcnt, &p);
+	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
+	  break;
+
+	case push_dummy_failure:
+	  printf ("/push_dummy_failure");
+	  break;
+
+	case maybe_pop_jump:
+	  extract_number_and_incr (&mcnt, &p);
+	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
+	  break;
+
+	case pop_failure_jump:
+	  extract_number_and_incr (&mcnt, &p);
+	  printf ("/pop_failure_jump to %d", p + mcnt - start);
+	  break;
+
+	case jump_past_alt:
+	  extract_number_and_incr (&mcnt, &p);
+	  printf ("/jump_past_alt to %d", p + mcnt - start);
+	  break;
+
+	case jump:
+	  extract_number_and_incr (&mcnt, &p);
+	  printf ("/jump to %d", p + mcnt - start);
+	  break;
+
+	case succeed_n:
+	  extract_number_and_incr (&mcnt, &p);
+	  extract_number_and_incr (&mcnt2, &p);
+	  printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
+	  break;
+
+	case jump_n:
+	  extract_number_and_incr (&mcnt, &p);
+	  extract_number_and_incr (&mcnt2, &p);
+	  printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
+	  break;
+
+	case set_number_at:
+	  extract_number_and_incr (&mcnt, &p);
+	  extract_number_and_incr (&mcnt2, &p);
+	  printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
+	  break;
+
+	case wordbound:
+	  printf ("/wordbound");
+	  break;
+
+	case notwordbound:
+	  printf ("/notwordbound");
+	  break;
+
+	case wordbeg:
+	  printf ("/wordbeg");
+	  break;
+
+	case wordend:
+	  printf ("/wordend");
+
+#ifdef emacs
+	case before_dot:
+	  printf ("/before_dot");
+	  break;
+
+	case at_dot:
+	  printf ("/at_dot");
+	  break;
+
+	case after_dot:
+	  printf ("/after_dot");
+	  break;
+
+	case syntaxspec:
+	  printf ("/syntaxspec");
+	  mcnt = *p++;
+	  printf ("/%d", mcnt);
+	  break;
+
+	case notsyntaxspec:
+	  printf ("/notsyntaxspec");
+	  mcnt = *p++;
+	  printf ("/%d", mcnt);
+	  break;
+#endif /* emacs */
+
+	case wordchar:
+	  printf ("/wordchar");
+	  break;
+
+	case notwordchar:
+	  printf ("/notwordchar");
+	  break;
+
+	case begbuf:
+	  printf ("/begbuf");
+	  break;
+
+	case endbuf:
+	  printf ("/endbuf");
+	  break;
+
+	default:
+	  printf ("?%d", *(p-1));
+	}
+
+      putchar ('\n');
+    }
+
+  printf ("%d:\tend of pattern.\n", p - start);
+}
+
+
+void
+print_compiled_pattern (bufp)
+    struct re_pattern_buffer *bufp;
+{
+  unsigned char *buffer = bufp->buffer;
+
+  print_partial_compiled_pattern (buffer, buffer + bufp->used);
+  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
+
+  if (bufp->fastmap_accurate && bufp->fastmap)
+    {
+      printf ("fastmap: ");
+      print_fastmap (bufp->fastmap);
+    }
+
+  printf ("re_nsub: %d\t", bufp->re_nsub);
+  printf ("regs_alloc: %d\t", bufp->regs_allocated);
+  printf ("can_be_null: %d\t", bufp->can_be_null);
+  printf ("newline_anchor: %d\n", bufp->newline_anchor);
+  printf ("no_sub: %d\t", bufp->no_sub);
+  printf ("not_bol: %d\t", bufp->not_bol);
+  printf ("not_eol: %d\t", bufp->not_eol);
+  printf ("syntax: %d\n", bufp->syntax);
+  /* Perhaps we should print the translate table?  */
+}
+
+
+void
+print_double_string (where, string1, size1, string2, size2)
+    const char *where;
+    const char *string1;
+    const char *string2;
+    int size1;
+    int size2;
+{
+  unsigned this_char;
+
+  if (where == NULL)
+    printf ("(null)");
+  else
+    {
+      if (FIRST_STRING_P (where))
+	{
+	  for (this_char = where - string1; this_char < size1; this_char++)
+	    putchar (string1[this_char]);
+
+	  where = string2;
+	}
+
+      for (this_char = where - string2; this_char < size2; this_char++)
+	putchar (string2[this_char]);
+    }
+}
+
+#else /* not DEBUG */
+
+#undef assert
+#define assert(e)
+
+#define DEBUG_STATEMENT(e)
+#define DEBUG_PRINT1(x)
+#define DEBUG_PRINT2(x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
+   also be assigned to arbitrarily: each pattern buffer stores its own
+   syntax, so it can be changed between regex compilations.  */
+/* This has no initializer because initialized variables in Emacs
+   become read-only after dumping.  */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation.  This provides
+   for compatibility for various utilities which historically have
+   different, incompatible syntaxes.
+
+   The argument SYNTAX is a bit mask comprised of the various bits
+   defined in regex.h.	We return the old syntax.  */
+
+reg_syntax_t
+re_set_syntax (syntax)
+    reg_syntax_t syntax;
+{
+  reg_syntax_t ret = re_syntax_options;
+
+  re_syntax_options = syntax;
+  return ret;
+}
+
+/* This table gives an error message for each of the error codes listed
+   in regex.h.	Obviously the order here has to be same as there.
+   POSIX doesn't require that we do anything for REG_NOERROR,
+   but why not be nice?	 */
+
+static const char *re_error_msgid[] =
+  {
+    gettext_noop ("Success"),	/* REG_NOERROR */
+    gettext_noop ("No match"),	/* REG_NOMATCH */
+    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
+    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
+    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
+    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
+    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
+    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
+    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
+    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
+    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
+    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
+    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
+    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
+    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
+    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
+    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
+  };
+
+/* Avoiding alloca during matching, to placate r_alloc.	 */
+
+/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
+   searching and matching functions should not call alloca.  On some
+   systems, alloca is implemented in terms of malloc, and if we're
+   using the relocating allocator routines, then malloc could cause a
+   relocation, which might (if the strings being searched are in the
+   ralloc heap) shift the data out from underneath the regexp
+   routines.
+
+   Here's another reason to avoid allocation: Emacs
+   processes input from X in a signal handler; processing X input may
+   call malloc; if input arrives while a matching routine is calling
+   malloc, then we're scrod.  But Emacs can't just block input while
+   calling matching routines; then we don't notice interrupts when
+   they come in.  So, Emacs blocks input around all regexp calls
+   except the matching calls, which it leaves unprotected, in the
+   faith that they will not malloc.  */
+
+/* Normally, this is fine.  */
+#define MATCH_MAY_ALLOCATE
+
+/* When using GNU C, we are not REALLY using the C alloca, no matter
+   what config.h may say.  So don't take precautions for it.  */
+#ifdef __GNUC__
+#undef C_ALLOCA
+#endif
+
+/* The match routines may not allocate if (1) they would do it with malloc
+   and (2) it's not safe for them to use malloc.
+   Note that if REL_ALLOC is defined, matching would not use malloc for the
+   failure stack, but we would still use it for the register vectors;
+   so REL_ALLOC should not affect this.	 */
+#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
+#undef MATCH_MAY_ALLOCATE
+#endif
+
+
+/* Failure stack declarations and macros; both re_compile_fastmap and
+   re_match_2 use a failure stack.  These have to be macros because of
+   REGEX_ALLOCATE_STACK.  */
+
+
+/* Approximate number of failure points for which to initially allocate space
+   when matching.  If this number is exceeded, we allocate more
+   space, so it is not a hard limit.  */
+#ifndef INIT_FAILURE_ALLOC
+#define INIT_FAILURE_ALLOC 20
+#endif
+
+/* Roughly the maximum number of failure points on the stack.  Would be
+   exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
+   This is a variable only so users of regex can assign to it; we never
+   change it ourselves.	 */
+#if defined (MATCH_MAY_ALLOCATE)
+/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
+   whose default stack limit is 2mb.  In order for a larger
+   value to work reliably, you have to try to make it accord
+   with the process stack limit.  */
+int re_max_failures = 40000;
+#else
+int re_max_failures = 4000;
+#endif
+
+union fail_stack_elt
+{
+  unsigned char *pointer;
+  int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct
+{
+  fail_stack_elt_t *stack;
+  unsigned size;
+  unsigned avail;			/* Offset of next open position.  */
+} fail_stack_type;
+
+#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
+
+
+/* Define macros to initialize and free the failure stack.
+   Do `return -2' if the alloc fails.  */
+
+#ifdef MATCH_MAY_ALLOCATE
+#define INIT_FAIL_STACK()						\
+  do {									\
+    fail_stack.stack = (fail_stack_elt_t *)				\
+      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE	\
+			    * sizeof (fail_stack_elt_t));		\
+									\
+    if (fail_stack.stack == NULL)					\
+      return -2;							\
+									\
+    fail_stack.size = INIT_FAILURE_ALLOC;				\
+    fail_stack.avail = 0;						\
+  } while (0)
+
+#define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
+#else
+#define INIT_FAIL_STACK()						\
+  do {									\
+    fail_stack.avail = 0;						\
+  } while (0)
+
+#define RESET_FAIL_STACK()
+#endif
+
+
+/* Double the size of FAIL_STACK, up to a limit
+   which allows approximately `re_max_failures' items.
+
+   Return 1 if succeeds, and 0 if either ran out of memory
+   allocating space for it or it was already too large.
+
+   REGEX_REALLOCATE_STACK requires `destination' be declared.	*/
+
+/* Factor to increase the failure stack size by
+   when we increase it.
+   This used to be 2, but 2 was too wasteful
+   because the old discarded stacks added up to as much space
+   were as ultimate, maximum-size stack.  */
+#define FAIL_STACK_GROWTH_FACTOR 4
+
+#define GROW_FAIL_STACK(fail_stack)					\
+  (((fail_stack).size * sizeof (fail_stack_elt_t)			\
+    >= re_max_failures * TYPICAL_FAILURE_SIZE)				\
+   ? 0									\
+   : ((fail_stack).stack						\
+      = (fail_stack_elt_t *)						\
+	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
+	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
+	  MIN (re_max_failures * TYPICAL_FAILURE_SIZE,			\
+	       ((fail_stack).size * sizeof (fail_stack_elt_t)		\
+		* FAIL_STACK_GROWTH_FACTOR))),				\
+									\
+      (fail_stack).stack == NULL					\
+      ? 0								\
+      : ((fail_stack).size						\
+	 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,		\
+		 ((fail_stack).size * sizeof (fail_stack_elt_t)		\
+		  * FAIL_STACK_GROWTH_FACTOR))				\
+	    / sizeof (fail_stack_elt_t)),				\
+	 1)))
+
+
+/* Push pointer POINTER on FAIL_STACK.
+   Return 1 if was able to do so and 0 if ran out of memory allocating
+   space to do so.  */
+#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
+  ((FAIL_STACK_FULL ()							\
+    && !GROW_FAIL_STACK (FAIL_STACK))					\
+   ? 0									\
+   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
+      1))
+
+/* Push a pointer value onto the failure stack.
+   Assumes the variable `fail_stack'.  Probably should only
+   be called from within `PUSH_FAILURE_POINT'.	*/
+#define PUSH_FAILURE_POINTER(item)					\
+  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
+
+/* This pushes an integer-valued item onto the failure stack.
+   Assumes the variable `fail_stack'.  Probably should only
+   be called from within `PUSH_FAILURE_POINT'.	*/
+#define PUSH_FAILURE_INT(item)					\
+  fail_stack.stack[fail_stack.avail++].integer = (item)
+
+/* Push a fail_stack_elt_t value onto the failure stack.
+   Assumes the variable `fail_stack'.  Probably should only
+   be called from within `PUSH_FAILURE_POINT'.	*/
+#define PUSH_FAILURE_ELT(item)					\
+  fail_stack.stack[fail_stack.avail++] =  (item)
+
+/* These three POP... operations complement the three PUSH... operations.
+   All assume that `fail_stack' is nonempty.  */
+#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
+#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
+#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging.  */
+#ifdef DEBUG
+#define DEBUG_PUSH PUSH_FAILURE_INT
+#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
+#else
+#define DEBUG_PUSH(item)
+#define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+   if we ever fail back to it.
+
+   Requires variables fail_stack, regstart, regend, reg_info, and
+   num_regs be declared.  GROW_FAIL_STACK requires `destination' be
+   declared.
+
+   Does `return FAILURE_CODE' if runs out of memory.  */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
+  do {									\
+    char *destination;							\
+    /* Must be int, so when we don't save any registers, the arithmetic	\
+       of 0 + -1 isn't done as unsigned.  */				\
+    int this_reg;							\
+									\
+    DEBUG_STATEMENT (failure_id++);					\
+    DEBUG_STATEMENT (nfailure_points_pushed++);				\
+    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
+    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
+    DEBUG_PRINT2 ("			size: %d\n", (fail_stack).size);\
+									\
+    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
+    DEBUG_PRINT2 ("	available: %d\n", REMAINING_AVAIL_SLOTS);	\
+									\
+    /* Ensure we have enough space allocated for what we will push.  */	\
+    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
+      {									\
+	if (!GROW_FAIL_STACK (fail_stack))				\
+	  return failure_code;						\
+									\
+	DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
+		       (fail_stack).size);				\
+	DEBUG_PRINT2 ("	 slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+      }									\
+									\
+    /* Push the info, starting with the registers.  */			\
+    DEBUG_PRINT1 ("\n");						\
+									\
+    if (1)								\
+      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+	   this_reg++)							\
+	{								\
+	  DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);		\
+	  DEBUG_STATEMENT (num_regs_pushed++);				\
+									\
+	  DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);	\
+	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
+									\
+	  DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
+	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
+									\
+	  DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
+	  DEBUG_PRINT2 (" match_null=%d",				\
+			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
+	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
+	  DEBUG_PRINT2 (" matched_something=%d",			\
+			MATCHED_SOMETHING (reg_info[this_reg]));	\
+	  DEBUG_PRINT2 (" ever_matched=%d",				\
+			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
+	  DEBUG_PRINT1 ("\n");						\
+	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
+	}								\
+									\
+    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
+    PUSH_FAILURE_INT (lowest_active_reg);				\
+									\
+    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
+    PUSH_FAILURE_INT (highest_active_reg);				\
+									\
+    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
+    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
+    PUSH_FAILURE_POINTER (pattern_place);				\
+									\
+    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
+    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,	\
+				 size2);				\
+    DEBUG_PRINT1 ("'\n");						\
+    PUSH_FAILURE_POINTER (string_place);				\
+									\
+    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
+    DEBUG_PUSH (failure_id);						\
+  } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+   for each register.  */
+#define NUM_REG_ITEMS  3
+
+/* Individual items aside from the registers.  */
+#ifdef DEBUG
+#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
+#else
+#define NUM_NONREG_ITEMS 4
+#endif
+
+/* Estimate the size of data pushed by a typical failure stack entry.
+   An estimate is all we need, because all we use this for
+   is to choose a limit for how big to make the failure stack.  */
+
+#define TYPICAL_FAILURE_SIZE 20
+
+/* This is how many items we actually use for a failure point.
+   It depends on the regexp.  */
+#define NUM_FAILURE_ITEMS				\
+  (((0							\
+     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
+    * NUM_REG_ITEMS)					\
+   + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it.  */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+   We restore into the parameters, all of which should be lvalues:
+     STR -- the saved data position.
+     PAT -- the saved pattern position.
+     LOW_REG, HIGH_REG -- the highest and lowest active registers.
+     REGSTART, REGEND -- arrays of string positions.
+     REG_INFO -- array of information about each subexpression.
+
+   Also assumes the variables `fail_stack' and (if debugging), `bufp',
+   `pend', `string1', `size1', `string2', and `size2'.	*/
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{									\
+  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
+  int this_reg;								\
+  const unsigned char *string_temp;					\
+									\
+  assert (!FAIL_STACK_EMPTY ());					\
+									\
+  /* Remove failure points and point to how many regs pushed.  */	\
+  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
+  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
+  DEBUG_PRINT2 ("		     size: %d\n", fail_stack.size);	\
+									\
+  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
+									\
+  DEBUG_POP (&failure_id);						\
+  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
+									\
+  /* If the saved string location is NULL, it came from an		\
+     on_failure_keep_string_jump opcode, and we want to throw away the	\
+     saved NULL, thus retaining our current position in the string.  */	\
+  string_temp = POP_FAILURE_POINTER ();					\
+  if (string_temp != NULL)						\
+    str = (const char *) string_temp;					\
+									\
+  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
+  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
+  DEBUG_PRINT1 ("'\n");							\
+									\
+  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
+  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
+  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
+									\
+  /* Restore register info.  */						\
+  high_reg = (unsigned) POP_FAILURE_INT ();				\
+  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
+									\
+  low_reg = (unsigned) POP_FAILURE_INT ();				\
+  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
+									\
+  if (1)								\
+    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
+      {									\
+	DEBUG_PRINT2 ("	   Popping reg: %d\n", this_reg);		\
+									\
+	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
+	DEBUG_PRINT2 ("	     info: 0x%x\n", reg_info[this_reg]);	\
+									\
+	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
+	DEBUG_PRINT2 ("	     end: 0x%x\n", regend[this_reg]);		\
+									\
+	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
+	DEBUG_PRINT2 ("	     start: 0x%x\n", regstart[this_reg]);	\
+      }									\
+  else									\
+    {									\
+      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
+	{								\
+	  reg_info[this_reg].word.integer = 0;				\
+	  regend[this_reg] = 0;						\
+	  regstart[this_reg] = 0;					\
+	}								\
+      highest_active_reg = high_reg;					\
+    }									\
+									\
+  set_regs_matched_done = 0;						\
+  DEBUG_STATEMENT (nfailure_points_popped++);				\
+} /* POP_FAILURE_POINT */
+
+
+
+/* Structure for per-register (a.k.a. per-group) information.
+   Other register information, such as the
+   starting and ending positions (which are addresses), and the list of
+   inner groups (which is a bits list) are maintained in separate
+   variables.
+
+   We are making a (strictly speaking) nonportable assumption here: that
+   the compiler will pack our bit fields into something that fits into
+   the type of `word', i.e., is something that fits into one item on the
+   failure stack.  */
+
+typedef union
+{
+  fail_stack_elt_t word;
+  struct
+  {
+      /* This field is one if this group can match the empty string,
+	 zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
+#define MATCH_NULL_UNSET_VALUE 3
+    unsigned match_null_string_p : 2;
+    unsigned is_active : 1;
+    unsigned matched_something : 1;
+    unsigned ever_matched_something : 1;
+  } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R)  ((R).bits.is_active)
+#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+   for the subexpressions which we are currently inside.  Also records
+   that those subexprs have matched.  */
+#define SET_REGS_MATCHED()						\
+  do									\
+    {									\
+      if (!set_regs_matched_done)					\
+	{								\
+	  unsigned r;							\
+	  set_regs_matched_done = 1;					\
+	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
+	    {								\
+	      MATCHED_SOMETHING (reg_info[r])				\
+		= EVER_MATCHED_SOMETHING (reg_info[r])			\
+		= 1;							\
+	    }								\
+	}								\
+    }									\
+  while (0)
+
+/* Registers are set to a sentinel when they haven't yet matched.  */
+static char reg_unset_dummy;
+#define REG_UNSET_VALUE (&reg_unset_dummy)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+/* Subroutine declarations and macros for regex_compile.  */
+
+static void store_op1 (), store_op2 ();
+static void insert_op1 (), insert_op2 ();
+static boolean at_begline_loc_p (), at_endline_loc_p ();
+static boolean group_in_compile_stack ();
+static reg_errcode_t compile_range ();
+
+/* Fetch the next character in the uncompiled pattern---translating it
+   if necessary.  Also cast from a signed character in the constant
+   string passed to us by the user to an unsigned char that we can use
+   as an array index (in, e.g., `translate').  */
+#ifndef PATFETCH
+#define PATFETCH(c)							\
+  do {if (p == pend) return REG_EEND;					\
+    c = (unsigned char) *p++;						\
+    if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c);	\
+  } while (0)
+#endif
+
+/* Fetch the next character in the uncompiled pattern, with no
+   translation.	 */
+#define PATFETCH_RAW(c)							\
+  do {if (p == pend) return REG_EEND;					\
+    c = (unsigned char) *p++;						\
+  } while (0)
+
+/* Go backwards one character in the pattern.  */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D.  We
+   cast the subscript to translate because some data is declared as
+   `char *', to avoid warnings when a string constant is passed.  But
+   when we use a character as a subscript we must make it unsigned.  */
+#ifndef TRANSLATE
+#define TRANSLATE(d) \
+  (RE_TRANSLATE_P (translate) \
+   ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
+#endif
+
+
+/* Macros for outputting the compiled pattern into `buffer'.  */
+
+/* If the buffer isn't allocated when it comes in, use this.  */
+#define INIT_BUF_SIZE  32
+
+/* Make sure we have at least N more bytes of space in buffer.	*/
+#define GET_BUFFER_SPACE(n)						\
+    while (b - bufp->buffer + (n) > bufp->allocated)			\
+      EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it.  */
+#define BUF_PUSH(c)							\
+  do {									\
+    GET_BUFFER_SPACE (1);						\
+    *b++ = (unsigned char) (c);						\
+  } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
+#define BUF_PUSH_2(c1, c2)						\
+  do {									\
+    GET_BUFFER_SPACE (2);						\
+    *b++ = (unsigned char) (c1);					\
+    *b++ = (unsigned char) (c2);					\
+  } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes.	*/
+#define BUF_PUSH_3(c1, c2, c3)						\
+  do {									\
+    GET_BUFFER_SPACE (3);						\
+    *b++ = (unsigned char) (c1);					\
+    *b++ = (unsigned char) (c2);					\
+    *b++ = (unsigned char) (c3);					\
+  } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO.  We store a
+   relative address offset by the three bytes the jump itself occupies.	 */
+#define STORE_JUMP(op, loc, to) \
+  store_op1 (op, loc, (to) - (loc) - 3)
+
+/* Likewise, for a two-argument jump.  */
+#define STORE_JUMP2(op, loc, to, arg) \
+  store_op2 (op, loc, (to) - (loc) - 3, arg)
+
+/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.	 */
+#define INSERT_JUMP(op, loc, to) \
+  insert_op1 (op, loc, (to) - (loc) - 3, b)
+
+/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
+#define INSERT_JUMP2(op, loc, to, arg) \
+  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+   into the pattern are two bytes long.	 So if 2^16 bytes turns out to
+   be too small, many things would have to change.  */
+#define MAX_BUF_SIZE (1L << 16)
+
+
+/* Extend the buffer by twice its current size via realloc and
+   reset the pointers that pointed into the old block to point to the
+   correct places in the new one.  If extending the buffer results in it
+   being larger than MAX_BUF_SIZE, then flag memory exhausted.	*/
+#define EXTEND_BUFFER()							\
+  do {									\
+    unsigned char *old_buffer = bufp->buffer;				\
+    if (bufp->allocated == MAX_BUF_SIZE)				\
+      return REG_ESIZE;							\
+    bufp->allocated <<= 1;						\
+    if (bufp->allocated > MAX_BUF_SIZE)					\
+      bufp->allocated = MAX_BUF_SIZE;					\
+    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+    if (bufp->buffer == NULL)						\
+      return REG_ESPACE;						\
+    /* If the buffer moved, move all the pointers into it.  */		\
+    if (old_buffer != bufp->buffer)					\
+      {									\
+	b = (b - old_buffer) + bufp->buffer;				\
+	begalt = (begalt - old_buffer) + bufp->buffer;			\
+	if (fixup_alt_jump)						\
+	  fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+	if (laststart)							\
+	  laststart = (laststart - old_buffer) + bufp->buffer;		\
+	if (pending_exact)						\
+	  pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
+      }									\
+  } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+   {start,stop}_memory, the maximum number of groups we can report
+   things about is what fits in that byte.  */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers.  We just
+   ignore the excess.  */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack.  */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.	 */
+typedef int pattern_offset_t;
+
+typedef struct
+{
+  pattern_offset_t begalt_offset;
+  pattern_offset_t fixup_alt_jump;
+  pattern_offset_t inner_group_offset;
+  pattern_offset_t laststart_offset;
+  regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+  compile_stack_elt_t *stack;
+  unsigned size;
+  unsigned avail;			/* Offset of next open position.  */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
+
+/* The next available element.	*/
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Structure to manage work area for range table.  */
+struct range_table_work_area
+{
+  int *table;			/* actual work area.  */
+  int allocated;		/* allocated size for work area in bytes.  */
+  int used;			/* actually used size in words.	 */
+};
+
+/* Make sure that WORK_AREA can hold more N multibyte characters.  */
+#define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n)			  \
+  do {									  \
+    if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated)  \
+      {									  \
+	(work_area).allocated += 16 * sizeof (int);			  \
+	if ((work_area).table)						  \
+	  (work_area).table						  \
+	    = (int *) realloc ((work_area).table, (work_area).allocated); \
+	else								  \
+	  (work_area).table						  \
+	    = (int *) malloc ((work_area).allocated);			  \
+	if ((work_area).table == 0)					  \
+	  FREE_STACK_RETURN (REG_ESPACE);				  \
+      }									  \
+  } while (0)
+
+/* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
+#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)	\
+  do {									\
+    EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2);			\
+    (work_area).table[(work_area).used++] = (range_start);		\
+    (work_area).table[(work_area).used++] = (range_end);		\
+  } while (0)
+
+/* Free allocated memory for WORK_AREA.	 */
+#define FREE_RANGE_TABLE_WORK_AREA(work_area)	\
+  do {						\
+    if ((work_area).table)			\
+      free ((work_area).table);			\
+  } while (0)
+
+#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0)
+#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
+#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
+
+
+/* Set the bit for character C in a list.  */
+#define SET_LIST_BIT(c)				      \
+  (b[((unsigned char) (c)) / BYTEWIDTH]		      \
+   |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern.  */
+#define GET_UNSIGNED_NUMBER(num)					\
+  { if (p != pend)							\
+     {									\
+       PATFETCH (c);							\
+       while (ISDIGIT (c))						\
+	 {								\
+	   if (num < 0)							\
+	      num = 0;							\
+	   num = num * 10 + c - '0';					\
+	   if (p == pend)						\
+	      break;							\
+	   PATFETCH (c);						\
+	 }								\
+       }								\
+    }
+
+#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
+
+#define IS_CHAR_CLASS(string)						\
+   (STREQ (string, "alpha") || STREQ (string, "upper")			\
+    || STREQ (string, "lower") || STREQ (string, "digit")		\
+    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
+    || STREQ (string, "space") || STREQ (string, "print")		\
+    || STREQ (string, "punct") || STREQ (string, "graph")		\
+    || STREQ (string, "cntrl") || STREQ (string, "blank"))
+
+#ifndef MATCH_MAY_ALLOCATE
+
+/* If we cannot allocate large objects within re_match_2_internal,
+   we make the fail stack and register vectors global.
+   The fail stack, we grow to the maximum size when a regexp
+   is compiled.
+   The register vectors, we adjust in size each time we
+   compile a regexp, according to the number of registers it needs.  */
+
+static fail_stack_type fail_stack;
+
+/* Size with which the following vectors are currently allocated.
+   That is so we can make them bigger as needed,
+   but never make them smaller.	 */
+static int regs_allocated_size;
+
+static const char **	 regstart, **	  regend;
+static const char ** old_regstart, ** old_regend;
+static const char **best_regstart, **best_regend;
+static register_info_type *reg_info;
+static const char **reg_dummy;
+static register_info_type *reg_info_dummy;
+
+/* Make the register vectors big enough for NUM_REGS registers,
+   but don't make them smaller.	 */
+
+static
+regex_grow_registers (num_regs)
+     int num_regs;
+{
+  if (num_regs > regs_allocated_size)
+    {
+      RETALLOC_IF (regstart,	 num_regs, const char *);
+      RETALLOC_IF (regend,	 num_regs, const char *);
+      RETALLOC_IF (old_regstart, num_regs, const char *);
+      RETALLOC_IF (old_regend,	 num_regs, const char *);
+      RETALLOC_IF (best_regstart, num_regs, const char *);
+      RETALLOC_IF (best_regend,	 num_regs, const char *);
+      RETALLOC_IF (reg_info,	 num_regs, register_info_type);
+      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
+      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
+
+      regs_allocated_size = num_regs;
+    }
+}
+
+#endif /* not MATCH_MAY_ALLOCATE */
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+   Returns one of error codes defined in `regex.h', or zero for success.
+
+   Assumes the `allocated' (and perhaps `buffer') and `translate'
+   fields are set in BUFP on entry.
+
+   If it succeeds, results are put in BUFP (if it returns an error, the
+   contents of BUFP are undefined):
+     `buffer' is the compiled pattern;
+     `syntax' is set to SYNTAX;
+     `used' is set to the length of the compiled pattern;
+     `fastmap_accurate' is zero;
+     `re_nsub' is the number of subexpressions in PATTERN;
+     `not_bol' and `not_eol' are zero;
+
+   The `fastmap' and `newline_anchor' fields are neither
+   examined nor set.  */
+
+/* Return, freeing storage we allocated.  */
+#define FREE_STACK_RETURN(value)		\
+  do {							\
+    FREE_RANGE_TABLE_WORK_AREA (range_table_work);	\
+    free (compile_stack.stack);				\
+    return value;					\
+  } while (0)
+
+static reg_errcode_t
+regex_compile (pattern, size, syntax, bufp)
+     const char *pattern;
+     int size;
+     reg_syntax_t syntax;
+     struct re_pattern_buffer *bufp;
+{
+  /* We fetch characters from PATTERN here.  Even though PATTERN is
+     `char *' (i.e., signed), we declare these variables as unsigned, so
+     they can be reliably used as array indices.  */
+  register unsigned int c, c1;
+
+  /* A random temporary spot in PATTERN.  */
+  const char *p1;
+
+  /* Points to the end of the buffer, where we should append.  */
+  register unsigned char *b;
+
+  /* Keeps track of unclosed groups.  */
+  compile_stack_type compile_stack;
+
+  /* Points to the current (ending) position in the pattern.  */
+#ifdef AIX
+  /* `const' makes AIX compiler fail.  */
+  char *p = pattern;
+#else
+  const char *p = pattern;
+#endif
+  const char *pend = pattern + size;
+
+  /* How to translate the characters in the pattern.  */
+  RE_TRANSLATE_TYPE translate = bufp->translate;
+
+  /* Address of the count-byte of the most recently inserted `exactn'
+     command.  This makes it possible to tell if a new exact-match
+     character can be added to that command or if the character requires
+     a new `exactn' command.  */
+  unsigned char *pending_exact = 0;
+
+  /* Address of start of the most recently finished expression.
+     This tells, e.g., postfix * where to find the start of its
+     operand.  Reset at the beginning of groups and alternatives.  */
+  unsigned char *laststart = 0;
+
+  /* Address of beginning of regexp, or inside of last group.  */
+  unsigned char *begalt;
+
+  /* Place in the uncompiled pattern (i.e., the {) to
+     which to go back if the interval is invalid.  */
+  const char *beg_interval;
+
+  /* Address of the place where a forward jump should go to the end of
+     the containing expression.	 Each alternative of an `or' -- except the
+     last -- ends with a forward jump of this sort.  */
+  unsigned char *fixup_alt_jump = 0;
+
+  /* Counts open-groups as they are encountered.  Remembered for the
+     matching close-group on the compile stack, so the same register
+     number is put in the stop_memory as the start_memory.  */
+  regnum_t regnum = 0;
+
+  /* Work area for range table of charset.  */
+  struct range_table_work_area range_table_work;
+
+#ifdef DEBUG
+  DEBUG_PRINT1 ("\nCompiling pattern: ");
+  if (debug)
+    {
+      unsigned debug_count;
+
+      for (debug_count = 0; debug_count < size; debug_count++)
+	putchar (pattern[debug_count]);
+      putchar ('\n');
+    }
+#endif /* DEBUG */
+
+  /* Initialize the compile stack.  */
+  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+  if (compile_stack.stack == NULL)
+    return REG_ESPACE;
+
+  compile_stack.size = INIT_COMPILE_STACK_SIZE;
+  compile_stack.avail = 0;
+
+  range_table_work.table = 0;
+  range_table_work.allocated = 0;
+
+  /* Initialize the pattern buffer.  */
+  bufp->syntax = syntax;
+  bufp->fastmap_accurate = 0;
+  bufp->not_bol = bufp->not_eol = 0;
+
+  /* Set `used' to zero, so that if we return an error, the pattern
+     printer (for debugging) will think there's no pattern.  We reset it
+     at the end.  */
+  bufp->used = 0;
+
+  /* Always count groups, whether or not bufp->no_sub is set.  */
+  bufp->re_nsub = 0;
+
+#ifdef emacs
+  /* bufp->multibyte is set before regex_compile is called, so don't alter
+     it. */
+#else  /* not emacs */
+  /* Nothing is recognized as a multibyte character.  */
+  bufp->multibyte = 0;
+#endif
+
+#if !defined (emacs) && !defined (SYNTAX_TABLE)
+  /* Initialize the syntax table.  */
+   init_syntax_once ();
+#endif
+
+  if (bufp->allocated == 0)
+    {
+      if (bufp->buffer)
+	{ /* If zero allocated, but buffer is non-null, try to realloc
+	     enough space.  This loses if buffer's address is bogus, but
+	     that is the user's responsibility.	 */
+	  RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+	}
+      else
+	{ /* Caller did not allocate a buffer.	Do it for them.	 */
+	  bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+	}
+      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
+
+      bufp->allocated = INIT_BUF_SIZE;
+    }
+
+  begalt = b = bufp->buffer;
+
+  /* Loop through the uncompiled pattern until we're at the end.  */
+  while (p != pend)
+    {
+      PATFETCH (c);
+
+      switch (c)
+	{
+	case '^':
+	  {
+	    if (   /* If at start of pattern, it's an operator.	 */
+		   p == pattern + 1
+		   /* If context independent, it's an operator.	 */
+		|| syntax & RE_CONTEXT_INDEP_ANCHORS
+		   /* Otherwise, depends on what's come before.	 */
+		|| at_begline_loc_p (pattern, p, syntax))
+	      BUF_PUSH (begline);
+	    else
+	      goto normal_char;
+	  }
+	  break;
+
+
+	case '$':
+	  {
+	    if (   /* If at end of pattern, it's an operator.  */
+		   p == pend
+		   /* If context independent, it's an operator.	 */
+		|| syntax & RE_CONTEXT_INDEP_ANCHORS
+		   /* Otherwise, depends on what's next.  */
+		|| at_endline_loc_p (p, pend, syntax))
+	       BUF_PUSH (endline);
+	     else
+	       goto normal_char;
+	   }
+	   break;
+
+
+	case '+':
+	case '?':
+	  if ((syntax & RE_BK_PLUS_QM)
+	      || (syntax & RE_LIMITED_OPS))
+	    goto normal_char;
+	handle_plus:
+	case '*':
+	  /* If there is no previous pattern... */
+	  if (!laststart)
+	    {
+	      if (syntax & RE_CONTEXT_INVALID_OPS)
+		FREE_STACK_RETURN (REG_BADRPT);
+	      else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+		goto normal_char;
+	    }
+
+	  {
+	    /* Are we optimizing this jump?  */
+	    boolean keep_string_p = false;
+
+	    /* 1 means zero (many) matches is allowed.	*/
+	    char zero_times_ok = 0, many_times_ok = 0;
+
+	    /* If there is a sequence of repetition chars, collapse it
+	       down to just one (the right one).  We can't combine
+	       interval operators with these because of, e.g., `a{2}*',
+	       which should only match an even number of `a's.	*/
+
+	    for (;;)
+	      {
+		zero_times_ok |= c != '+';
+		many_times_ok |= c != '?';
+
+		if (p == pend)
+		  break;
+
+		PATFETCH (c);
+
+		if (c == '*'
+		    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+		  ;
+
+		else if (syntax & RE_BK_PLUS_QM	 &&  c == '\\')
+		  {
+		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+		    PATFETCH (c1);
+		    if (!(c1 == '+' || c1 == '?'))
+		      {
+			PATUNFETCH;
+			PATUNFETCH;
+			break;
+		      }
+
+		    c = c1;
+		  }
+		else
+		  {
+		    PATUNFETCH;
+		    break;
+		  }
+
+		/* If we get here, we found another repeat character.  */
+	       }
+
+	    /* Star, etc. applied to an empty pattern is equivalent
+	       to an empty pattern.  */
+	    if (!laststart)
+	      break;
+
+	    /* Now we know whether or not zero matches is allowed
+	       and also whether or not two or more matches is allowed.	*/
+	    if (many_times_ok)
+	      { /* More than one repetition is allowed, so put in at the
+		   end a backward relative jump from `b' to before the next
+		   jump we're going to put in below (which jumps from
+		   laststart to after this jump).
+
+		   But if we are at the `*' in the exact sequence `.*\n',
+		   insert an unconditional jump backwards to the .,
+		   instead of the beginning of the loop.  This way we only
+		   push a failure point once, instead of every time
+		   through the loop.  */
+		assert (p - 1 > pattern);
+
+		/* Allocate the space for the jump.  */
+		GET_BUFFER_SPACE (3);
+
+		/* We know we are not at the first character of the pattern,
+		   because laststart was nonzero.  And we've already
+		   incremented `p', by the way, to be the character after
+		   the `*'.  Do we have to do something analogous here
+		   for null bytes, because of RE_DOT_NOT_NULL?	*/
+		if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.')
+		    && zero_times_ok
+		    && p < pend
+		    && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n')
+		    && !(syntax & RE_DOT_NEWLINE))
+		  { /* We have .*\n.  */
+		    STORE_JUMP (jump, b, laststart);
+		    keep_string_p = true;
+		  }
+		else
+		  /* Anything else.  */
+		  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+
+		/* We've added more stuff to the buffer.  */
+		b += 3;
+	      }
+
+	    /* On failure, jump from laststart to b + 3, which will be the
+	       end of the buffer after this jump is inserted.  */
+	    GET_BUFFER_SPACE (3);
+	    INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+				       : on_failure_jump,
+			 laststart, b + 3);
+	    pending_exact = 0;
+	    b += 3;
+
+	    if (!zero_times_ok)
+	      {
+		/* At least one repetition is required, so insert a
+		   `dummy_failure_jump' before the initial
+		   `on_failure_jump' instruction of the loop. This
+		   effects a skip over that instruction the first time
+		   we hit that loop.  */
+		GET_BUFFER_SPACE (3);
+		INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+		b += 3;
+	      }
+	    }
+	  break;
+
+
+	case '.':
+	  laststart = b;
+	  BUF_PUSH (anychar);
+	  break;
+
+
+	case '[':
+	  {
+	    CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
+
+	    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+	    /* Ensure that we have enough space to push a charset: the
+	       opcode, the length count, and the bitset; 34 bytes in all.  */
+	    GET_BUFFER_SPACE (34);
+
+	    laststart = b;
+
+	    /* We test `*p == '^' twice, instead of using an if
+	       statement, so we only need one BUF_PUSH.	 */
+	    BUF_PUSH (*p == '^' ? charset_not : charset);
+	    if (*p == '^')
+	      p++;
+
+	    /* Remember the first position in the bracket expression.  */
+	    p1 = p;
+
+	    /* Push the number of bytes in the bitmap.	*/
+	    BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+	    /* Clear the whole map.  */
+	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+	    /* charset_not matches newline according to a syntax bit.  */
+	    if ((re_opcode_t) b[-2] == charset_not
+		&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+	      SET_LIST_BIT ('\n');
+
+	    /* Read in characters and ranges, setting map bits.	 */
+	    for (;;)
+	      {
+		int len;
+		boolean escaped_char = false;
+
+		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+		PATFETCH (c);
+
+		/* \ might escape characters inside [...] and [^...].  */
+		if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+		  {
+		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+		    PATFETCH (c);
+		    escaped_char = true;
+		  }
+		else
+		  {
+		    /* Could be the end of the bracket expression.	If it's
+		       not (i.e., when the bracket expression is `[]' so
+		       far), the ']' character bit gets set way below.  */
+		    if (c == ']' && p != p1 + 1)
+		      break;
+		  }
+
+		/* If C indicates start of multibyte char, get the
+		   actual character code in C, and set the pattern
+		   pointer P to the next character boundary.  */
+		if (bufp->multibyte && BASE_LEADING_CODE_P (c))
+		  {
+		    PATUNFETCH;
+		    c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+		    p += len;
+		  }
+		/* What should we do for the character which is
+		   greater than 0x7F, but not BASE_LEADING_CODE_P?
+		   XXX */
+
+		/* See if we're at the beginning of a possible character
+		   class.  */
+
+		else if (!escaped_char &&
+			 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+		  {
+		    /* Leave room for the null.	 */
+		    char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+		    PATFETCH (c);
+		    c1 = 0;
+
+		    /* If pattern is `[[:'.  */
+		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+		    for (;;)
+		      {
+			PATFETCH (c);
+			if (c == ':' || c == ']' || p == pend
+			    || c1 == CHAR_CLASS_MAX_LENGTH)
+			  break;
+			str[c1++] = c;
+		      }
+		    str[c1] = '\0';
+
+		    /* If isn't a word bracketed by `[:' and `:]':
+		       undo the ending character, the letters, and
+		       leave the leading `:' and `[' (but set bits for
+		       them).  */
+		    if (c == ':' && *p == ']')
+		      {
+			int ch;
+			boolean is_alnum = STREQ (str, "alnum");
+			boolean is_alpha = STREQ (str, "alpha");
+			boolean is_blank = STREQ (str, "blank");
+			boolean is_cntrl = STREQ (str, "cntrl");
+			boolean is_digit = STREQ (str, "digit");
+			boolean is_graph = STREQ (str, "graph");
+			boolean is_lower = STREQ (str, "lower");
+			boolean is_print = STREQ (str, "print");
+			boolean is_punct = STREQ (str, "punct");
+			boolean is_space = STREQ (str, "space");
+			boolean is_upper = STREQ (str, "upper");
+			boolean is_xdigit = STREQ (str, "xdigit");
+
+			if (!IS_CHAR_CLASS (str))
+			  FREE_STACK_RETURN (REG_ECTYPE);
+
+			/* Throw away the ] at the end of the character
+			   class.  */
+			PATFETCH (c);
+
+			if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+
+			for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+			  {
+			    int translated = TRANSLATE (ch);
+			    /* This was split into 3 if's to
+			       avoid an arbitrary limit in some compiler.  */
+			    if (   (is_alnum  && ISALNUM (ch))
+				|| (is_alpha  && ISALPHA (ch))
+				|| (is_blank  && ISBLANK (ch))
+				|| (is_cntrl  && ISCNTRL (ch)))
+			      SET_LIST_BIT (translated);
+			    if (   (is_digit  && ISDIGIT (ch))
+				|| (is_graph  && ISGRAPH (ch))
+				|| (is_lower  && ISLOWER (ch))
+				|| (is_print  && ISPRINT (ch)))
+			      SET_LIST_BIT (translated);
+			    if (   (is_punct  && ISPUNCT (ch))
+				|| (is_space  && ISSPACE (ch))
+				|| (is_upper  && ISUPPER (ch))
+				|| (is_xdigit && ISXDIGIT (ch)))
+			      SET_LIST_BIT (translated);
+			  }
+
+			/* Repeat the loop. */
+			continue;
+		      }
+		    else
+		      {
+			c1++;
+			while (c1--)
+			  PATUNFETCH;
+			SET_LIST_BIT ('[');
+
+			/* Because the `:' may starts the range, we
+			   can't simply set bit and repeat the loop.
+			   Instead, just set it to C and handle below.	*/
+			c = ':';
+		      }
+		  }
+
+		if (p < pend && p[0] == '-' && p[1] != ']')
+		  {
+
+		    /* Discard the `-'. */
+		    PATFETCH (c1);
+
+		    /* Fetch the character which ends the range. */
+		    PATFETCH (c1);
+		    if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
+		      {
+			PATUNFETCH;
+			c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+			p += len;
+		      }
+
+		    if (SINGLE_BYTE_CHAR_P (c)
+			&& ! SINGLE_BYTE_CHAR_P (c1))
+		      {
+			/* Handle a range such as \177-\377 in multibyte mode.
+			   Split that into two ranges,,
+			   the low one ending at 0237, and the high one
+			   starting at ...040.  */
+			int c1_base = (c1 & ~0177) | 040;
+			SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
+			c1 = 0237;
+		      }
+		    else if (!SAME_CHARSET_P (c, c1))
+		      FREE_STACK_RETURN (REG_ERANGE);
+		  }
+		else
+		  /* Range from C to C. */
+		  c1 = c;
+
+		/* Set the range ... */
+		if (SINGLE_BYTE_CHAR_P (c))
+		  /* ... into bitmap.  */
+		  {
+		    unsigned this_char;
+		    int range_start = c, range_end = c1;
+
+		    /* If the start is after the end, the range is empty.  */
+		    if (range_start > range_end)
+		      {
+			if (syntax & RE_NO_EMPTY_RANGES)
+			  FREE_STACK_RETURN (REG_ERANGE);
+			/* Else, repeat the loop.  */
+		      }
+		    else
+		      {
+			for (this_char = range_start; this_char <= range_end;
+			     this_char++)
+			  SET_LIST_BIT (TRANSLATE (this_char));
+		      }
+		  }
+		else
+		  /* ... into range table.  */
+		  SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
+	      }
+
+	    /* Discard any (non)matching list bytes that are all 0 at the
+	       end of the map.	Decrease the map-length byte too.  */
+	    while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+	      b[-1]--;
+	    b += b[-1];
+
+	    /* Build real range table from work area. */
+	    if (RANGE_TABLE_WORK_USED (range_table_work))
+	      {
+		int i;
+		int used = RANGE_TABLE_WORK_USED (range_table_work);
+
+		/* Allocate space for COUNT + RANGE_TABLE.  Needs two
+		   bytes for COUNT and three bytes for each character.	*/
+		GET_BUFFER_SPACE (2 + used * 3);
+
+		/* Indicate the existence of range table.  */
+		laststart[1] |= 0x80;
+
+		STORE_NUMBER_AND_INCR (b, used / 2);
+		for (i = 0; i < used; i++)
+		  STORE_CHARACTER_AND_INCR
+		    (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
+	      }
+	  }
+	  break;
+
+
+	case '(':
+	  if (syntax & RE_NO_BK_PARENS)
+	    goto handle_open;
+	  else
+	    goto normal_char;
+
+
+	case ')':
+	  if (syntax & RE_NO_BK_PARENS)
+	    goto handle_close;
+	  else
+	    goto normal_char;
+
+
+	case '\n':
+	  if (syntax & RE_NEWLINE_ALT)
+	    goto handle_alt;
+	  else
+	    goto normal_char;
+
+
+	case '|':
+	  if (syntax & RE_NO_BK_VBAR)
+	    goto handle_alt;
+	  else
+	    goto normal_char;
+
+
+	case '{':
+	   if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+	     goto handle_interval;
+	   else
+	     goto normal_char;
+
+
+	case '\\':
+	  if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
+
+	  /* Do not translate the character after the \, so that we can
+	     distinguish, e.g., \B from \b, even if we normally would
+	     translate, e.g., B to b.  */
+	  PATFETCH_RAW (c);
+
+	  switch (c)
+	    {
+	    case '(':
+	      if (syntax & RE_NO_BK_PARENS)
+		goto normal_backslash;
+
+	    handle_open:
+	      bufp->re_nsub++;
+	      regnum++;
+
+	      if (COMPILE_STACK_FULL)
+		{
+		  RETALLOC (compile_stack.stack, compile_stack.size << 1,
+			    compile_stack_elt_t);
+		  if (compile_stack.stack == NULL) return REG_ESPACE;
+
+		  compile_stack.size <<= 1;
+		}
+
+	      /* These are the values to restore when we hit end of this
+		 group.	 They are all relative offsets, so that if the
+		 whole pattern moves because of realloc, they will still
+		 be valid.  */
+	      COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
+	      COMPILE_STACK_TOP.fixup_alt_jump
+		= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+	      COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+	      COMPILE_STACK_TOP.regnum = regnum;
+
+	      /* We will eventually replace the 0 with the number of
+		 groups inner to this one.  But do not push a
+		 start_memory for groups beyond the last one we can
+		 represent in the compiled pattern.  */
+	      if (regnum <= MAX_REGNUM)
+		{
+		  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
+		  BUF_PUSH_3 (start_memory, regnum, 0);
+		}
+
+	      compile_stack.avail++;
+
+	      fixup_alt_jump = 0;
+	      laststart = 0;
+	      begalt = b;
+	      /* If we've reached MAX_REGNUM groups, then this open
+		 won't actually generate any code, so we'll have to
+		 clear pending_exact explicitly.  */
+	      pending_exact = 0;
+	      break;
+
+
+	    case ')':
+	      if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+	      if (COMPILE_STACK_EMPTY)
+		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+		  goto normal_backslash;
+		else
+		  FREE_STACK_RETURN (REG_ERPAREN);
+
+	    handle_close:
+	      if (fixup_alt_jump)
+		{ /* Push a dummy failure point at the end of the
+		     alternative for a possible future
+		     `pop_failure_jump' to pop.	 See comments at
+		     `push_dummy_failure' in `re_match_2'.  */
+		  BUF_PUSH (push_dummy_failure);
+
+		  /* We allocated space for this jump when we assigned
+		     to `fixup_alt_jump', in the `handle_alt' case below.  */
+		  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+		}
+
+	      /* See similar code for backslashed left paren above.  */
+	      if (COMPILE_STACK_EMPTY)
+		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+		  goto normal_char;
+		else
+		  FREE_STACK_RETURN (REG_ERPAREN);
+
+	      /* Since we just checked for an empty stack above, this
+		 ``can't happen''.  */
+	      assert (compile_stack.avail != 0);
+	      {
+		/* We don't just want to restore into `regnum', because
+		   later groups should continue to be numbered higher,
+		   as in `(ab)c(de)' -- the second group is #2.	 */
+		regnum_t this_group_regnum;
+
+		compile_stack.avail--;
+		begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
+		fixup_alt_jump
+		  = COMPILE_STACK_TOP.fixup_alt_jump
+		    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
+		    : 0;
+		laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
+		this_group_regnum = COMPILE_STACK_TOP.regnum;
+		/* If we've reached MAX_REGNUM groups, then this open
+		   won't actually generate any code, so we'll have to
+		   clear pending_exact explicitly.  */
+		pending_exact = 0;
+
+		/* We're at the end of the group, so now we know how many
+		   groups were inside this one.	 */
+		if (this_group_regnum <= MAX_REGNUM)
+		  {
+		    unsigned char *inner_group_loc
+		      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
+
+		    *inner_group_loc = regnum - this_group_regnum;
+		    BUF_PUSH_3 (stop_memory, this_group_regnum,
+				regnum - this_group_regnum);
+		  }
+	      }
+	      break;
+
+
+	    case '|':					/* `\|'.  */
+	      if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+		goto normal_backslash;
+	    handle_alt:
+	      if (syntax & RE_LIMITED_OPS)
+		goto normal_char;
+
+	      /* Insert before the previous alternative a jump which
+		 jumps to this alternative if the former fails.	 */
+	      GET_BUFFER_SPACE (3);
+	      INSERT_JUMP (on_failure_jump, begalt, b + 6);
+	      pending_exact = 0;
+	      b += 3;
+
+	      /* The alternative before this one has a jump after it
+		 which gets executed if it gets matched.  Adjust that
+		 jump so it will jump to this alternative's analogous
+		 jump (put in below, which in turn will jump to the next
+		 (if any) alternative's such jump, etc.).  The last such
+		 jump jumps to the correct final destination.  A picture:
+			  _____ _____
+			  |   | |   |
+			  |   v |   v
+			 a | b	 | c
+
+		 If we are at `b', then fixup_alt_jump right now points to a
+		 three-byte space after `a'.  We'll put in the jump, set
+		 fixup_alt_jump to right after `b', and leave behind three
+		 bytes which we'll fill in when we get to after `c'.  */
+
+	      if (fixup_alt_jump)
+		STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+	      /* Mark and leave space for a jump after this alternative,
+		 to be filled in later either by next alternative or
+		 when know we're at the end of a series of alternatives.  */
+	      fixup_alt_jump = b;
+	      GET_BUFFER_SPACE (3);
+	      b += 3;
+
+	      laststart = 0;
+	      begalt = b;
+	      break;
+
+
+	    case '{':
+	      /* If \{ is a literal.  */
+	      if (!(syntax & RE_INTERVALS)
+		     /* If we're at `\{' and it's not the open-interval
+			operator.  */
+		  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+		  || (p - 2 == pattern	&&  p == pend))
+		goto normal_backslash;
+
+	    handle_interval:
+	      {
+		/* If got here, then the syntax allows intervals.  */
+
+		/* At least (most) this many matches must be made.  */
+		int lower_bound = -1, upper_bound = -1;
+
+		beg_interval = p - 1;
+
+		if (p == pend)
+		  {
+		    if (syntax & RE_NO_BK_BRACES)
+		      goto unfetch_interval;
+		    else
+		      FREE_STACK_RETURN (REG_EBRACE);
+		  }
+
+		GET_UNSIGNED_NUMBER (lower_bound);
+
+		if (c == ',')
+		  {
+		    GET_UNSIGNED_NUMBER (upper_bound);
+		    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+		  }
+		else
+		  /* Interval such as `{1}' => match exactly once. */
+		  upper_bound = lower_bound;
+
+		if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+		    || lower_bound > upper_bound)
+		  {
+		    if (syntax & RE_NO_BK_BRACES)
+		      goto unfetch_interval;
+		    else
+		      FREE_STACK_RETURN (REG_BADBR);
+		  }
+
+		if (!(syntax & RE_NO_BK_BRACES))
+		  {
+		    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
+
+		    PATFETCH (c);
+		  }
+
+		if (c != '}')
+		  {
+		    if (syntax & RE_NO_BK_BRACES)
+		      goto unfetch_interval;
+		    else
+		      FREE_STACK_RETURN (REG_BADBR);
+		  }
+
+		/* We just parsed a valid interval.  */
+
+		/* If it's invalid to have no preceding re.  */
+		if (!laststart)
+		  {
+		    if (syntax & RE_CONTEXT_INVALID_OPS)
+		      FREE_STACK_RETURN (REG_BADRPT);
+		    else if (syntax & RE_CONTEXT_INDEP_OPS)
+		      laststart = b;
+		    else
+		      goto unfetch_interval;
+		  }
+
+		/* If the upper bound is zero, don't want to succeed at
+		   all; jump from `laststart' to `b + 3', which will be
+		   the end of the buffer after we insert the jump.  */
+		 if (upper_bound == 0)
+		   {
+		     GET_BUFFER_SPACE (3);
+		     INSERT_JUMP (jump, laststart, b + 3);
+		     b += 3;
+		   }
+
+		 /* Otherwise, we have a nontrivial interval.  When
+		    we're all done, the pattern will look like:
+		      set_number_at <jump count> <upper bound>
+		      set_number_at <succeed_n count> <lower bound>
+		      succeed_n <after jump addr> <succeed_n count>
+		      <body of loop>
+		      jump_n <succeed_n addr> <jump count>
+		    (The upper bound and `jump_n' are omitted if
+		    `upper_bound' is 1, though.)  */
+		 else
+		   { /* If the upper bound is > 1, we need to insert
+			more at the end of the loop.  */
+		     unsigned nbytes = 10 + (upper_bound > 1) * 10;
+
+		     GET_BUFFER_SPACE (nbytes);
+
+		     /* Initialize lower bound of the `succeed_n', even
+			though it will be set during matching by its
+			attendant `set_number_at' (inserted next),
+			because `re_compile_fastmap' needs to know.
+			Jump to the `jump_n' we might insert below.  */
+		     INSERT_JUMP2 (succeed_n, laststart,
+				   b + 5 + (upper_bound > 1) * 5,
+				   lower_bound);
+		     b += 5;
+
+		     /* Code to initialize the lower bound.  Insert
+			before the `succeed_n'.	 The `5' is the last two
+			bytes of this `set_number_at', plus 3 bytes of
+			the following `succeed_n'.  */
+		     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+		     b += 5;
+
+		     if (upper_bound > 1)
+		       { /* More than one repetition is allowed, so
+			    append a backward jump to the `succeed_n'
+			    that starts this interval.
+
+			    When we've reached this during matching,
+			    we'll have matched the interval once, so
+			    jump back only `upper_bound - 1' times.  */
+			 STORE_JUMP2 (jump_n, b, laststart + 5,
+				      upper_bound - 1);
+			 b += 5;
+
+			 /* The location we want to set is the second
+			    parameter of the `jump_n'; that is `b-2' as
+			    an absolute address.  `laststart' will be
+			    the `set_number_at' we're about to insert;
+			    `laststart+3' the number to set, the source
+			    for the relative address.  But we are
+			    inserting into the middle of the pattern --
+			    so everything is getting moved up by 5.
+			    Conclusion: (b - 2) - (laststart + 3) + 5,
+			    i.e., b - laststart.
+
+			    We insert this at the beginning of the loop
+			    so that if we fail during matching, we'll
+			    reinitialize the bounds.  */
+			 insert_op2 (set_number_at, laststart, b - laststart,
+				     upper_bound - 1, b);
+			 b += 5;
+		       }
+		   }
+		pending_exact = 0;
+		beg_interval = NULL;
+	      }
+	      break;
+
+	    unfetch_interval:
+	      /* If an invalid interval, match the characters as literals.  */
+	       assert (beg_interval);
+	       p = beg_interval;
+	       beg_interval = NULL;
+
+	       /* normal_char and normal_backslash need `c'.  */
+	       PATFETCH (c);
+
+	       if (!(syntax & RE_NO_BK_BRACES))
+		 {
+		   if (p > pattern  &&	p[-1] == '\\')
+		     goto normal_backslash;
+		 }
+	       goto normal_char;
+
+#ifdef emacs
+	    /* There is no way to specify the before_dot and after_dot
+	       operators.  rms says this is ok.	 --karl	 */
+	    case '=':
+	      BUF_PUSH (at_dot);
+	      break;
+
+	    case 's':
+	      laststart = b;
+	      PATFETCH (c);
+	      BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+	      break;
+
+	    case 'S':
+	      laststart = b;
+	      PATFETCH (c);
+	      BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+	      break;
+
+	    case 'c':
+	      laststart = b;
+	      PATFETCH_RAW (c);
+	      BUF_PUSH_2 (categoryspec, c);
+	      break;
+
+	    case 'C':
+	      laststart = b;
+	      PATFETCH_RAW (c);
+	      BUF_PUSH_2 (notcategoryspec, c);
+	      break;
+#endif /* emacs */
+
+
+	    case 'w':
+	      laststart = b;
+	      BUF_PUSH (wordchar);
+	      break;
+
+
+	    case 'W':
+	      laststart = b;
+	      BUF_PUSH (notwordchar);
+	      break;
+
+
+	    case '<':
+	      BUF_PUSH (wordbeg);
+	      break;
+
+	    case '>':
+	      BUF_PUSH (wordend);
+	      break;
+
+	    case 'b':
+	      BUF_PUSH (wordbound);
+	      break;
+
+	    case 'B':
+	      BUF_PUSH (notwordbound);
+	      break;
+
+	    case '`':
+	      BUF_PUSH (begbuf);
+	      break;
+
+	    case '\'':
+	      BUF_PUSH (endbuf);
+	      break;
+
+	    case '1': case '2': case '3': case '4': case '5':
+	    case '6': case '7': case '8': case '9':
+	      if (syntax & RE_NO_BK_REFS)
+		goto normal_char;
+
+	      c1 = c - '0';
+
+	      if (c1 > regnum)
+		FREE_STACK_RETURN (REG_ESUBREG);
+
+	      /* Can't back reference to a subexpression if inside of it.  */
+	      if (group_in_compile_stack (compile_stack, c1))
+		goto normal_char;
+
+	      laststart = b;
+	      BUF_PUSH_2 (duplicate, c1);
+	      break;
+
+
+	    case '+':
+	    case '?':
+	      if (syntax & RE_BK_PLUS_QM)
+		goto handle_plus;
+	      else
+		goto normal_backslash;
+
+	    default:
+	    normal_backslash:
+	      /* You might think it would be useful for \ to mean
+		 not to translate; but if we don't translate it
+		 it will never match anything.	*/
+	      c = TRANSLATE (c);
+	      goto normal_char;
+	    }
+	  break;
+
+
+	default:
+	/* Expects the character in `c'.  */
+	normal_char:
+	  p1 = p - 1;		/* P1 points the head of C.  */
+#ifdef emacs
+	  if (bufp->multibyte)
+	    {
+	      c = STRING_CHAR (p1, pend - p1);
+	      c = TRANSLATE (c);
+	      /* Set P to the next character boundary.  */
+	      p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
+	    }
+#endif
+	      /* If no exactn currently being built.  */
+	  if (!pending_exact
+
+	      /* If last exactn not at current position.  */
+	      || pending_exact + *pending_exact + 1 != b
+
+	      /* We have only one byte following the exactn for the count.  */
+	      || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
+
+	      /* If followed by a repetition operator.	*/
+	      || (p != pend && (*p == '*' || *p == '^'))
+	      || ((syntax & RE_BK_PLUS_QM)
+		  ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
+		  : p != pend && (*p == '+' || *p == '?'))
+	      || ((syntax & RE_INTERVALS)
+		  && ((syntax & RE_NO_BK_BRACES)
+		      ? p != pend && *p == '{'
+		      : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
+	    {
+	      /* Start building a new exactn.  */
+
+	      laststart = b;
+
+	      BUF_PUSH_2 (exactn, 0);
+	      pending_exact = b - 1;
+	    }
+
+#ifdef emacs
+	  if (! SINGLE_BYTE_CHAR_P (c))
+	    {
+	      unsigned char work[4], *str;
+	      int i = CHAR_STRING (c, work, str);
+	      int j;
+	      for (j = 0; j < i; j++)
+		{
+		  BUF_PUSH (str[j]);
+		  (*pending_exact)++;
+		}
+	    }
+	  else
+#endif
+	    {
+	      BUF_PUSH (c);
+	      (*pending_exact)++;
+	    }
+	  break;
+	} /* switch (c) */
+    } /* while p != pend */
+
+
+  /* Through the pattern now.  */
+
+  if (fixup_alt_jump)
+    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+  if (!COMPILE_STACK_EMPTY)
+    FREE_STACK_RETURN (REG_EPAREN);
+
+  /* If we don't want backtracking, force success
+     the first time we reach the end of the compiled pattern.  */
+  if (syntax & RE_NO_POSIX_BACKTRACKING)
+    BUF_PUSH (succeed);
+
+  free (compile_stack.stack);
+
+  /* We have succeeded; set the length of the buffer.  */
+  bufp->used = b - bufp->buffer;
+
+#ifdef DEBUG
+  if (debug)
+    {
+      DEBUG_PRINT1 ("\nCompiled pattern: \n");
+      print_compiled_pattern (bufp);
+    }
+#endif /* DEBUG */
+
+#ifndef MATCH_MAY_ALLOCATE
+  /* Initialize the failure stack to the largest possible stack.  This
+     isn't necessary unless we're trying to avoid calling alloca in
+     the search and match routines.  */
+  {
+    int num_regs = bufp->re_nsub + 1;
+
+    if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
+      {
+	fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
+
+#ifdef emacs
+	if (! fail_stack.stack)
+	  fail_stack.stack
+	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
+					    * sizeof (fail_stack_elt_t));
+	else
+	  fail_stack.stack
+	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
+					     (fail_stack.size
+					      * sizeof (fail_stack_elt_t)));
+#else /* not emacs */
+	if (! fail_stack.stack)
+	  fail_stack.stack
+	    = (fail_stack_elt_t *) malloc (fail_stack.size
+					   * sizeof (fail_stack_elt_t));
+	else
+	  fail_stack.stack
+	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
+					    (fail_stack.size
+					     * sizeof (fail_stack_elt_t)));
+#endif /* not emacs */
+      }
+
+    regex_grow_registers (num_regs);
+  }
+#endif /* not MATCH_MAY_ALLOCATE */
+
+  return REG_NOERROR;
+} /* regex_compile */
+
+/* Subroutines for `regex_compile'.  */
+
+/* Store OP at LOC followed by two-byte integer parameter ARG.	*/
+
+static void
+store_op1 (op, loc, arg)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg;
+{
+  *loc = (unsigned char) op;
+  STORE_NUMBER (loc + 1, arg);
+}
+
+
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
+
+static void
+store_op2 (op, loc, arg1, arg2)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg1, arg2;
+{
+  *loc = (unsigned char) op;
+  STORE_NUMBER (loc + 1, arg1);
+  STORE_NUMBER (loc + 3, arg2);
+}
+
+
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+   for OP followed by two-byte integer parameter ARG.  */
+
+static void
+insert_op1 (op, loc, arg, end)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg;
+    unsigned char *end;
+{
+  register unsigned char *pfrom = end;
+  register unsigned char *pto = end + 3;
+
+  while (pfrom != loc)
+    *--pto = *--pfrom;
+
+  store_op1 (op, loc, arg);
+}
+
+
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
+
+static void
+insert_op2 (op, loc, arg1, arg2, end)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg1, arg2;
+    unsigned char *end;
+{
+  register unsigned char *pfrom = end;
+  register unsigned char *pto = end + 5;
+
+  while (pfrom != loc)
+    *--pto = *--pfrom;
+
+  store_op2 (op, loc, arg1, arg2);
+}
+
+
+/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
+   after an alternative or a begin-subexpression.  We assume there is at
+   least one character before the ^.  */
+
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+    const char *pattern, *p;
+    reg_syntax_t syntax;
+{
+  const char *prev = p - 2;
+  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+
+  return
+       /* After a subexpression?  */
+       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+       /* After an alternative?	 */
+    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
+
+
+/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
+   at least one character after the $, i.e., `P < PEND'.  */
+
+static boolean
+at_endline_loc_p (p, pend, syntax)
+    const char *p, *pend;
+    int syntax;
+{
+  const char *next = p;
+  boolean next_backslash = *next == '\\';
+  const char *next_next = p + 1 < pend ? p + 1 : 0;
+
+  return
+       /* Before a subexpression?  */
+       (syntax & RE_NO_BK_PARENS ? *next == ')'
+	: next_backslash && next_next && *next_next == ')')
+       /* Before an alternative?  */
+    || (syntax & RE_NO_BK_VBAR ? *next == '|'
+	: next_backslash && next_next && *next_next == '|');
+}
+
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+   false if it's not.  */
+
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+    compile_stack_type compile_stack;
+    regnum_t regnum;
+{
+  int this_element;
+
+  for (this_element = compile_stack.avail - 1;
+       this_element >= 0;
+       this_element--)
+    if (compile_stack.stack[this_element].regnum == regnum)
+      return true;
+
+  return false;
+}
+
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
+   characters can start a string that matches the pattern.  This fastmap
+   is used by re_search to skip quickly over impossible starting points.
+
+   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+   area as BUFP->fastmap.
+
+   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+   the pattern buffer.
+
+   Returns 0 if we succeed, -2 if an internal error.   */
+
+int
+re_compile_fastmap (bufp)
+     struct re_pattern_buffer *bufp;
+{
+  int i, j, k;
+#ifdef MATCH_MAY_ALLOCATE
+  fail_stack_type fail_stack;
+#endif
+#ifndef REGEX_MALLOC
+  char *destination;
+#endif
+  /* We don't push any register information onto the failure stack.  */
+  unsigned num_regs = 0;
+
+  register char *fastmap = bufp->fastmap;
+  unsigned char *pattern = bufp->buffer;
+  unsigned long size = bufp->used;
+  unsigned char *p = pattern;
+  register unsigned char *pend = pattern + size;
+
+  /* This holds the pointer to the failure stack, when
+     it is allocated relocatably.  */
+  fail_stack_elt_t *failure_stack_ptr;
+
+  /* Assume that each path through the pattern can be null until
+     proven otherwise.	We set this false at the bottom of switch
+     statement, to which we get only if a particular path doesn't
+     match the empty string.  */
+  boolean path_can_be_null = true;
+
+  /* We aren't doing a `succeed_n' to begin with.  */
+  boolean succeed_n_p = false;
+
+  /* If all elements for base leading-codes in fastmap is set, this
+     flag is set true.	*/
+  boolean match_any_multibyte_characters = false;
+
+  /* Maximum code of simple (single byte) character. */
+  int simple_char_max;
+
+  assert (fastmap != NULL && p != NULL);
+
+  INIT_FAIL_STACK ();
+  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.	*/
+  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
+  bufp->can_be_null = 0;
+
+  while (1)
+    {
+      if (p == pend || *p == succeed)
+	{
+	  /* We have reached the (effective) end of pattern.  */
+	  if (!FAIL_STACK_EMPTY ())
+	    {
+	      bufp->can_be_null |= path_can_be_null;
+
+	      /* Reset for next path.  */
+	      path_can_be_null = true;
+
+	      p = fail_stack.stack[--fail_stack.avail].pointer;
+
+	      continue;
+	    }
+	  else
+	    break;
+	}
+
+      /* We should never be about to go beyond the end of the pattern.	*/
+      assert (p < pend);
+
+      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
+	{
+
+	/* I guess the idea here is to simply not bother with a fastmap
+	   if a backreference is used, since it's too hard to figure out
+	   the fastmap for the corresponding group.  Setting
+	   `can_be_null' stops `re_search_2' from using the fastmap, so
+	   that is all we do.  */
+	case duplicate:
+	  bufp->can_be_null = 1;
+	  goto done;
+
+
+      /* Following are the cases which match a character.  These end
+	 with `break'.	*/
+
+	case exactn:
+	  fastmap[p[1]] = 1;
+	  break;
+
+
+#ifndef emacs
+	case charset:
+	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+	      fastmap[j] = 1;
+	  break;
+
+
+	case charset_not:
+	  /* Chars beyond end of map must be allowed.  */
+	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+	    fastmap[j] = 1;
+
+	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+	      fastmap[j] = 1;
+	  break;
+
+
+	case wordchar:
+	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	    if (SYNTAX (j) == Sword)
+	      fastmap[j] = 1;
+	  break;
+
+
+	case notwordchar:
+	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	    if (SYNTAX (j) != Sword)
+	      fastmap[j] = 1;
+	  break;
+#else  /* emacs */
+	case charset:
+	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
+	       j >= 0; j--)
+	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+	      fastmap[j] = 1;
+
+	  if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
+	      && match_any_multibyte_characters == false)
+	    {
+	      /* Set fastmap[I] 1 where I is a base leading code of each
+		 multibyte character in the range table. */
+	      int c, count;
+
+	      /* Make P points the range table. */
+	      p += CHARSET_BITMAP_SIZE (&p[-2]);
+
+	      /* Extract the number of ranges in range table into
+		 COUNT.	 */
+	      EXTRACT_NUMBER_AND_INCR (count, p);
+	      for (; count > 0; count--, p += 2 * 3) /* XXX */
+		{
+		  /* Extract the start of each range.  */
+		  EXTRACT_CHARACTER (c, p);
+		  j = CHAR_CHARSET (c);
+		  fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
+		}
+	    }
+	  break;
+
+
+	case charset_not:
+	  /* Chars beyond end of bitmap are possible matches.
+	     All the single-byte codes can occur in multibyte buffers.
+	     So any that are not listed in the charset
+	     are possible matches, even in multibyte buffers.  */
+	  simple_char_max = (1 << BYTEWIDTH);
+	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
+	       j < simple_char_max; j++)
+	    fastmap[j] = 1;
+
+	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
+	       j >= 0; j--)
+	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       which doesn't match the specified set of characters.  */
+	    {
+	    set_fastmap_for_multibyte_characters:
+	      if (match_any_multibyte_characters == false)
+		{
+		  for (j = 0x80; j < 0xA0; j++)	/* XXX */
+		    if (BASE_LEADING_CODE_P (j))
+		      fastmap[j] = 1;
+		  match_any_multibyte_characters = true;
+		}
+	    }
+	  break;
+
+
+	case wordchar:
+	  /* All the single-byte codes can occur in multibyte buffers,
+	     and they may have word syntax.  So do consider them.  */
+	  simple_char_max = (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
+	    if (SYNTAX (j) == Sword)
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose syntax is `Sword'.	 */
+	    goto set_fastmap_for_multibyte_characters;
+	  break;
+
+
+	case notwordchar:
+	  /* All the single-byte codes can occur in multibyte buffers,
+	     and they may not have word syntax.  So do consider them.  */
+	  simple_char_max = (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
+	    if (SYNTAX (j) != Sword)
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose syntax is not `Sword'.  */
+	    goto set_fastmap_for_multibyte_characters;
+	  break;
+#endif
+
+	case anychar:
+	  {
+	    int fastmap_newline = fastmap['\n'];
+
+	    /* `.' matches anything, except perhaps newline.
+	       Even in a multibyte buffer, it should match any
+	       conceivable byte value for the fastmap.  */
+	    if (bufp->multibyte)
+	      match_any_multibyte_characters = true;
+
+	    simple_char_max = (1 << BYTEWIDTH);
+	    for (j = 0; j < simple_char_max; j++)
+	      fastmap[j] = 1;
+
+	    /* ... except perhaps newline.  */
+	    if (!(bufp->syntax & RE_DOT_NEWLINE))
+	      fastmap['\n'] = fastmap_newline;
+
+	    /* Return if we have already set `can_be_null'; if we have,
+	       then the fastmap is irrelevant.	Something's wrong here.	 */
+	    else if (bufp->can_be_null)
+	      goto done;
+
+	    /* Otherwise, have to check alternative paths.  */
+	    break;
+	  }
+
+#ifdef emacs
+	case wordbound:
+	case notwordbound:
+	case wordbeg:
+	case wordend:
+	case notsyntaxspec:
+	case syntaxspec:
+	  /* This match depends on text properties.  These end with
+	     aborting optimizations.  */
+	  bufp->can_be_null = 1;
+	  goto done;
+#if 0
+	  k = *p++;
+	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
+	    if (SYNTAX (j) == (enum syntaxcode) k)
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose syntax is K.  */
+	    goto set_fastmap_for_multibyte_characters;
+	  break;
+
+	case notsyntaxspec:
+	  k = *p++;
+	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
+	    if (SYNTAX (j) != (enum syntaxcode) k)
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose syntax is not K.  */
+	    goto set_fastmap_for_multibyte_characters;
+	  break;
+#endif
+
+
+	case categoryspec:
+	  k = *p++;
+	  simple_char_max = (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
+	    if (CHAR_HAS_CATEGORY (j, k))
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose category is K.  */
+	    goto set_fastmap_for_multibyte_characters;
+	  break;
+
+
+	case notcategoryspec:
+	  k = *p++;
+	  simple_char_max = (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
+	    if (!CHAR_HAS_CATEGORY (j, k))
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose category is not K.	 */
+	    goto set_fastmap_for_multibyte_characters;
+	  break;
+
+      /* All cases after this match the empty string.  These end with
+	 `continue'.  */
+
+
+	case before_dot:
+	case at_dot:
+	case after_dot:
+	  continue;
+#endif /* emacs */
+
+
+	case no_op:
+	case begline:
+	case endline:
+	case begbuf:
+	case endbuf:
+#ifndef emacs
+	case wordbound:
+	case notwordbound:
+	case wordbeg:
+	case wordend:
+#endif
+	case push_dummy_failure:
+	  continue;
+
+
+	case jump_n:
+	case pop_failure_jump:
+	case maybe_pop_jump:
+	case jump:
+	case jump_past_alt:
+	case dummy_failure_jump:
+	  EXTRACT_NUMBER_AND_INCR (j, p);
+	  p += j;
+	  if (j > 0)
+	    continue;
+
+	  /* Jump backward implies we just went through the body of a
+	     loop and matched nothing.	Opcode jumped to should be
+	     `on_failure_jump' or `succeed_n'.	Just treat it like an
+	     ordinary jump.  For a * loop, it has pushed its failure
+	     point already; if so, discard that as redundant.  */
+	  if ((re_opcode_t) *p != on_failure_jump
+	      && (re_opcode_t) *p != succeed_n)
+	    continue;
+
+	  p++;
+	  EXTRACT_NUMBER_AND_INCR (j, p);
+	  p += j;
+
+	  /* If what's on the stack is where we are now, pop it.  */
+	  if (!FAIL_STACK_EMPTY ()
+	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
+	    fail_stack.avail--;
+
+	  continue;
+
+
+	case on_failure_jump:
+	case on_failure_keep_string_jump:
+	handle_on_failure_jump:
+	  EXTRACT_NUMBER_AND_INCR (j, p);
+
+	  /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+	     end of the pattern.  We don't want to push such a point,
+	     since when we restore it above, entering the switch will
+	     increment `p' past the end of the pattern.	 We don't need
+	     to push such a point since we obviously won't find any more
+	     fastmap entries beyond `pend'.  Such a pattern can match
+	     the null string, though.  */
+	  if (p + j < pend)
+	    {
+	      if (!PUSH_PATTERN_OP (p + j, fail_stack))
+		{
+		  RESET_FAIL_STACK ();
+		  return -2;
+		}
+	    }
+	  else
+	    bufp->can_be_null = 1;
+
+	  if (succeed_n_p)
+	    {
+	      EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.	*/
+	      succeed_n_p = false;
+	    }
+
+	  continue;
+
+
+	case succeed_n:
+	  /* Get to the number of times to succeed.  */
+	  p += 2;
+
+	  /* Increment p past the n for when k != 0.  */
+	  EXTRACT_NUMBER_AND_INCR (k, p);
+	  if (k == 0)
+	    {
+	      p -= 4;
+	      succeed_n_p = true;  /* Spaghetti code alert.  */
+	      goto handle_on_failure_jump;
+	    }
+	  continue;
+
+
+	case set_number_at:
+	  p += 4;
+	  continue;
+
+
+	case start_memory:
+	case stop_memory:
+	  p += 2;
+	  continue;
+
+
+	default:
+	  abort (); /* We have listed all the cases.  */
+	} /* switch *p++ */
+
+      /* Getting here means we have found the possible starting
+	 characters for one path of the pattern -- and that the empty
+	 string does not match.	 We need not follow this path further.
+	 Instead, look at the next alternative (remembered on the
+	 stack), or quit if no more.  The test at the top of the loop
+	 does these things.  */
+      path_can_be_null = false;
+      p = pend;
+    } /* while p */
+
+  /* Set `can_be_null' for the last path (also the first path, if the
+     pattern is empty).	 */
+  bufp->can_be_null |= path_can_be_null;
+
+ done:
+  RESET_FAIL_STACK ();
+  return 0;
+} /* re_compile_fastmap */
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
+   this memory for recording register information.  STARTS and ENDS
+   must be allocated using the malloc library routine, and must each
+   be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+    struct re_pattern_buffer *bufp;
+    struct re_registers *regs;
+    unsigned num_regs;
+    regoff_t *starts, *ends;
+{
+  if (num_regs)
+    {
+      bufp->regs_allocated = REGS_REALLOCATE;
+      regs->num_regs = num_regs;
+      regs->start = starts;
+      regs->end = ends;
+    }
+  else
+    {
+      bufp->regs_allocated = REGS_UNALLOCATED;
+      regs->num_regs = 0;
+      regs->start = regs->end = (regoff_t *) 0;
+    }
+}
+
+/* Searching routines.	*/
+
+/* Like re_search_2, below, but only one string is specified, and
+   doesn't let you say where to stop matching. */
+
+int
+re_search (bufp, string, size, startpos, range, regs)
+     struct re_pattern_buffer *bufp;
+     const char *string;
+     int size, startpos, range;
+     struct re_registers *regs;
+{
+  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
+		      regs, size);
+}
+
+/* End address of virtual concatenation of string.  */
+#define STOP_ADDR_VSTRING(P)				\
+  (((P) >= size1 ? string2 + size2 : string1 + size1))
+
+/* Address of POS in the concatenation of virtual string. */
+#define POS_ADDR_VSTRING(POS)					\
+  (((POS) >= size1 ? string2 - size1 : string1) + (POS))
+
+/* Using the compiled pattern in BUFP->buffer, first tries to match the
+   virtual concatenation of STRING1 and STRING2, starting first at index
+   STARTPOS, then at STARTPOS + 1, and so on.
+
+   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+
+   RANGE is how far to scan while trying to match.  RANGE = 0 means try
+   only at STARTPOS; in general, the last start tried is STARTPOS +
+   RANGE.
+
+   In REGS, return the indices of the virtual concatenation of STRING1
+   and STRING2 that matched the entire BUFP->buffer and its contained
+   subexpressions.
+
+   Do not consider matching one past the index STOP in the virtual
+   concatenation of STRING1 and STRING2.
+
+   We return either the position in the strings at which the match was
+   found, -1 if no match, or -2 if error (such as failure
+   stack overflow).  */
+
+int
+re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
+     struct re_pattern_buffer *bufp;
+     const char *string1, *string2;
+     int size1, size2;
+     int startpos;
+     int range;
+     struct re_registers *regs;
+     int stop;
+{
+  int val;
+  register char *fastmap = bufp->fastmap;
+  register RE_TRANSLATE_TYPE translate = bufp->translate;
+  int total_size = size1 + size2;
+  int endpos = startpos + range;
+  int anchored_start = 0;
+
+  /* Nonzero if we have to concern multibyte character.	 */
+  int multibyte = bufp->multibyte;
+
+  /* Check for out-of-range STARTPOS.  */
+  if (startpos < 0 || startpos > total_size)
+    return -1;
+
+  /* Fix up RANGE if it might eventually take us outside
+     the virtual concatenation of STRING1 and STRING2.
+     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
+  if (endpos < 0)
+    range = 0 - startpos;
+  else if (endpos > total_size)
+    range = total_size - startpos;
+
+  /* If the search isn't to be a backwards one, don't waste time in a
+     search for a pattern anchored at beginning of buffer.  */
+  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+    {
+      if (startpos > 0)
+	return -1;
+      else
+	range = 0;
+    }
+
+#ifdef emacs
+  /* In a forward search for something that starts with \=.
+     don't keep searching past point.  */
+  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
+    {
+      range = PT_BYTE - BEGV_BYTE - startpos;
+      if (range < 0)
+	return -1;
+    }
+#endif /* emacs */
+
+  /* Update the fastmap now if not correct already.  */
+  if (fastmap && !bufp->fastmap_accurate)
+    if (re_compile_fastmap (bufp) == -2)
+      return -2;
+
+  /* See whether the pattern is anchored.  */
+  if (bufp->buffer[0] == begline)
+    anchored_start = 1;
+
+#ifdef emacs
+  gl_state.object = re_match_object;
+  {
+    int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
+    int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos);
+
+    SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
+  }
+#endif
+
+  /* Loop through the string, looking for a place to start matching.  */
+  for (;;)
+    {
+      /* If the pattern is anchored,
+	 skip quickly past places we cannot match.
+	 We don't bother to treat startpos == 0 specially
+	 because that case doesn't repeat.  */
+      if (anchored_start && startpos > 0)
+	{
+	  if (! (bufp->newline_anchor
+		 && ((startpos <= size1 ? string1[startpos - 1]
+		      : string2[startpos - size1 - 1])
+		     == '\n')))
+	    goto advance;
+	}
+
+      /* If a fastmap is supplied, skip quickly over characters that
+	 cannot be the start of a match.  If the pattern can match the
+	 null string, however, we don't need to skip characters; we want
+	 the first null string.	 */
+      if (fastmap && startpos < total_size && !bufp->can_be_null)
+	{
+	  register const char *d;
+	  register unsigned int buf_ch;
+
+	  d = POS_ADDR_VSTRING (startpos);
+
+	  if (range > 0)	/* Searching forwards.	*/
+	    {
+	      register int lim = 0;
+	      int irange = range;
+
+	      if (startpos < size1 && startpos + range >= size1)
+		lim = range - (size1 - startpos);
+
+	      /* Written out as an if-else to avoid testing `translate'
+		 inside the loop.  */
+	      if (RE_TRANSLATE_P (translate))
+		{
+		  if (multibyte)
+		    while (range > lim)
+		      {
+			int buf_charlen;
+
+			buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
+							 buf_charlen);
+
+			buf_ch = RE_TRANSLATE (translate, buf_ch);
+			if (buf_ch >= 0400
+			    || fastmap[buf_ch])
+			  break;
+
+			range -= buf_charlen;
+			d += buf_charlen;
+		      }
+		  else
+		    while (range > lim
+			   && !fastmap[(unsigned char)
+				       RE_TRANSLATE (translate, (unsigned char) *d)])
+		      {
+			d++;
+			range--;
+		      }
+		}
+	      else
+		while (range > lim && !fastmap[(unsigned char) *d])
+		  {
+		    d++;
+		    range--;
+		  }
+
+	      startpos += irange - range;
+	    }
+	  else				/* Searching backwards.	 */
+	    {
+	      int room = (size1 == 0 || startpos >= size1
+			  ? size2 + size1 - startpos
+			  : size1 - startpos);
+
+	      buf_ch = STRING_CHAR (d, room);
+	      if (RE_TRANSLATE_P (translate))
+		buf_ch = RE_TRANSLATE (translate, buf_ch);
+
+	      if (! (buf_ch >= 0400
+		     || fastmap[buf_ch]))
+		goto advance;
+	    }
+	}
+
+      /* If can't match the null string, and that's all we have left, fail.  */
+      if (range >= 0 && startpos == total_size && fastmap
+	  && !bufp->can_be_null)
+	return -1;
+
+      val = re_match_2_internal (bufp, string1, size1, string2, size2,
+				 startpos, regs, stop);
+#ifndef REGEX_MALLOC
+#ifdef C_ALLOCA
+      alloca (0);
+#endif
+#endif
+
+      if (val >= 0)
+	return startpos;
+
+      if (val == -2)
+	return -2;
+
+    advance:
+      if (!range)
+	break;
+      else if (range > 0)
+	{
+	  /* Update STARTPOS to the next character boundary.  */
+	  if (multibyte)
+	    {
+	      const unsigned char *p
+		= (const unsigned char *) POS_ADDR_VSTRING (startpos);
+	      const unsigned char *pend
+		= (const unsigned char *) STOP_ADDR_VSTRING (startpos);
+	      int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
+
+	      range -= len;
+	      if (range < 0)
+		break;
+	      startpos += len;
+	    }
+	  else
+	    {
+	      range--;
+	      startpos++;
+	    }
+	}
+      else
+	{
+	  range++;
+	  startpos--;
+
+	  /* Update STARTPOS to the previous character boundary.  */
+	  if (multibyte)
+	    {
+	      const unsigned char *p
+		= (const unsigned char *) POS_ADDR_VSTRING (startpos);
+	      int len = 0;
+
+	      /* Find the head of multibyte form.  */
+	      while (!CHAR_HEAD_P (*p))
+		p--, len++;
+
+	      /* Adjust it. */
+#if 0				/* XXX */
+	      if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
+		;
+	      else
+#endif
+		{
+		  range += len;
+		  if (range > 0)
+		    break;
+
+		  startpos -= len;
+		}
+	    }
+	}
+    }
+  return -1;
+} /* re_search_2 */
+
+/* Declarations and macros for re_match_2.  */
+
+static int bcmp_translate ();
+static boolean alt_match_null_string_p (),
+	       common_op_match_null_string_p (),
+	       group_match_null_string_p ();
+
+/* This converts PTR, a pointer into one of the search strings `string1'
+   and `string2' into an offset from the beginning of that string.  */
+#define POINTER_TO_OFFSET(ptr)			\
+  (FIRST_STRING_P (ptr)				\
+   ? ((regoff_t) ((ptr) - string1))		\
+   : ((regoff_t) ((ptr) - string2 + size1)))
+
+/* Macros for dealing with the split strings in re_match_2.  */
+
+#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
+
+/* Call before fetching a character with *d.  This switches over to
+   string2 if necessary.  */
+#define PREFETCH()							\
+  while (d == dend)							\
+    {									\
+      /* End of string2 => fail.  */					\
+      if (dend == end_match_2)						\
+	goto fail;							\
+      /* End of string1 => advance to string2.	*/			\
+      d = string2;							\
+      dend = end_match_2;						\
+    }
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+   of `string1' and `string2'.	If only one string, it's `string2'.  */
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END(d) ((d) == end2)
+
+
+/* Test if D points to a character which is word-constituent.  We have
+   two special cases to check for: if past the end of string1, look at
+   the first character in string2; and if before the beginning of
+   string2, look at the last character in string1.  */
+#define WORDCHAR_P(d)							\
+  (SYNTAX ((d) == end1 ? *string2					\
+	   : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
+   == Sword)
+
+/* Disabled due to a compiler bug -- see comment at case wordbound */
+
+/* The comment at case wordbound is following one, but we don't use
+   AT_WORD_BOUNDARY anymore to support multibyte form.
+
+   The DEC Alpha C compiler 3.x generates incorrect code for the
+   test	 WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
+   AT_WORD_BOUNDARY, so this code is disabled.	Expanding the
+   macro and introducing temporary variables works around the bug.  */
+
+#if 0
+/* Test if the character before D and the one at D differ with respect
+   to being word-constituent.  */
+#define AT_WORD_BOUNDARY(d)						\
+  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
+   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
+#endif
+
+/* Free everything we malloc.  */
+#ifdef MATCH_MAY_ALLOCATE
+#define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
+#define FREE_VARIABLES()						\
+  do {									\
+    REGEX_FREE_STACK (fail_stack.stack);				\
+    FREE_VAR (regstart);						\
+    FREE_VAR (regend);							\
+    FREE_VAR (old_regstart);						\
+    FREE_VAR (old_regend);						\
+    FREE_VAR (best_regstart);						\
+    FREE_VAR (best_regend);						\
+    FREE_VAR (reg_info);						\
+    FREE_VAR (reg_dummy);						\
+    FREE_VAR (reg_info_dummy);						\
+  } while (0)
+#else
+#define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
+#endif /* not MATCH_MAY_ALLOCATE */
+
+/* These values must meet several constraints.	They must not be valid
+   register values; since we have a limit of 255 registers (because
+   we use only one byte in the pattern for the register number), we can
+   use numbers larger than 255.	 They must differ by 1, because of
+   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
+   be larger than the value for the highest register, so we do not try
+   to actually save any registers when none are active.	 */
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+
+/* Matching routines.  */
+
+#ifndef emacs	/* Emacs never uses this.  */
+/* re_match is like re_match_2 except it takes only a single string.  */
+
+int
+re_match (bufp, string, size, pos, regs)
+     struct re_pattern_buffer *bufp;
+     const char *string;
+     int size, pos;
+     struct re_registers *regs;
+{
+  int result = re_match_2_internal (bufp, NULL, 0, string, size,
+				    pos, regs, size);
+#ifndef REGEX_MALLOC	/* CVS */
+#ifdef C_ALLOCA		/* CVS */
+  alloca (0);
+#endif			/* CVS */
+#endif			/* CVS */
+  return result;
+}
+#endif /* not emacs */
+
+#ifdef emacs
+/* In Emacs, this is the string or buffer in which we
+   are matching.  It is used for looking up syntax properties.	*/
+Lisp_Object re_match_object;
+#endif
+
+/* re_match_2 matches the compiled pattern in BUFP against the
+   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+   and SIZE2, respectively).  We start matching at POS, and stop
+   matching at STOP.
+
+   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+   store offsets for the substring each group matched in REGS.	See the
+   documentation for exactly how many groups we fill.
+
+   We return -1 if no match, -2 if an internal error (such as the
+   failure stack overflowing).	Otherwise, we return the length of the
+   matched substring.  */
+
+int
+re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+     struct re_pattern_buffer *bufp;
+     const char *string1, *string2;
+     int size1, size2;
+     int pos;
+     struct re_registers *regs;
+     int stop;
+{
+  int result;
+
+#ifdef emacs
+  int charpos;
+  int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
+  gl_state.object = re_match_object;
+  charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos);
+  SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
+#endif
+
+  result = re_match_2_internal (bufp, string1, size1, string2, size2,
+				pos, regs, stop);
+#ifndef REGEX_MALLOC	/* CVS */
+#ifdef C_ALLOCA		/* CVS */
+  alloca (0);
+#endif			/* CVS */
+#endif			/* CVS */
+  return result;
+}
+
+/* This is a separate function so that we can force an alloca cleanup
+   afterwards.	*/
+static int
+re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
+     struct re_pattern_buffer *bufp;
+     const char *string1, *string2;
+     int size1, size2;
+     int pos;
+     struct re_registers *regs;
+     int stop;
+{
+  /* General temporaries.  */
+  int mcnt;
+  unsigned char *p1;
+
+  /* Just past the end of the corresponding string.  */
+  const char *end1, *end2;
+
+  /* Pointers into string1 and string2, just past the last characters in
+     each to consider matching.	 */
+  const char *end_match_1, *end_match_2;
+
+  /* Where we are in the data, and the end of the current string.  */
+  const char *d, *dend;
+
+  /* Where we are in the pattern, and the end of the pattern.  */
+  unsigned char *p = bufp->buffer;
+  register unsigned char *pend = p + bufp->used;
+
+  /* Mark the opcode just after a start_memory, so we can test for an
+     empty subpattern when we get to the stop_memory.  */
+  unsigned char *just_past_start_mem = 0;
+
+  /* We use this to map every character in the string.	*/
+  RE_TRANSLATE_TYPE translate = bufp->translate;
+
+  /* Nonzero if we have to concern multibyte character.	 */
+  int multibyte = bufp->multibyte;
+
+  /* Failure point stack.  Each place that can handle a failure further
+     down the line pushes a failure point on this stack.  It consists of
+     restart, regend, and reg_info for all registers corresponding to
+     the subexpressions we're currently inside, plus the number of such
+     registers, and, finally, two char *'s.  The first char * is where
+     to resume scanning the pattern; the second one is where to resume
+     scanning the strings.  If the latter is zero, the failure point is
+     a ``dummy''; if a failure happens and the failure point is a dummy,
+     it gets discarded and the next next one is tried.	*/
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
+  fail_stack_type fail_stack;
+#endif
+#ifdef DEBUG
+  static unsigned failure_id = 0;
+  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+#endif
+
+  /* This holds the pointer to the failure stack, when
+     it is allocated relocatably.  */
+  fail_stack_elt_t *failure_stack_ptr;
+
+  /* We fill all the registers internally, independent of what we
+     return, for use in backreferences.	 The number here includes
+     an element for register zero.  */
+  unsigned num_regs = bufp->re_nsub + 1;
+
+  /* The currently active registers.  */
+  unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+  unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+
+  /* Information on the contents of registers. These are pointers into
+     the input strings; they record just what was matched (on this
+     attempt) by a subexpression part of the pattern, that is, the
+     regnum-th regstart pointer points to where in the pattern we began
+     matching and the regnum-th regend points to right after where we
+     stopped matching the regnum-th subexpression.  (The zeroth register
+     keeps track of what the whole pattern matches.)  */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
+  const char **regstart, **regend;
+#endif
+
+  /* If a group that's operated upon by a repetition operator fails to
+     match anything, then the register for its start will need to be
+     restored because it will have been set to wherever in the string we
+     are when we last see its open-group operator.  Similarly for a
+     register's end.  */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
+  const char **old_regstart, **old_regend;
+#endif
+
+  /* The is_active field of reg_info helps us keep track of which (possibly
+     nested) subexpressions we are currently in. The matched_something
+     field of reg_info[reg_num] helps us tell whether or not we have
+     matched any of the pattern so far this time through the reg_num-th
+     subexpression.  These two fields get reset each time through any
+     loop their register is in.	 */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
+  register_info_type *reg_info;
+#endif
+
+  /* The following record the register info as found in the above
+     variables when we find a match better than any we've seen before.
+     This happens as we backtrack through the failure points, which in
+     turn happens only if we have not yet matched the entire string. */
+  unsigned best_regs_set = false;
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
+  const char **best_regstart, **best_regend;
+#endif
+
+  /* Logically, this is `best_regend[0]'.  But we don't want to have to
+     allocate space for that if we're not allocating space for anything
+     else (see below).	Also, we never need info about register 0 for
+     any of the other register vectors, and it seems rather a kludge to
+     treat `best_regend' differently than the rest.  So we keep track of
+     the end of the best match so far in a separate variable.  We
+     initialize this to NULL so that when we backtrack the first time
+     and need to test it, it's not garbage.  */
+  const char *match_end = NULL;
+
+  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
+  int set_regs_matched_done = 0;
+
+  /* Used when we pop values we don't care about.  */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
+  const char **reg_dummy;
+  register_info_type *reg_info_dummy;
+#endif
+
+#ifdef DEBUG
+  /* Counts the total number of registers pushed.  */
+  unsigned num_regs_pushed = 0;
+#endif
+
+  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
+
+  INIT_FAIL_STACK ();
+
+#ifdef MATCH_MAY_ALLOCATE
+  /* Do not bother to initialize all the register variables if there are
+     no groups in the pattern, as it takes a fair amount of time.  If
+     there are groups, we include space for register 0 (the whole
+     pattern), even though we never use it, since it simplifies the
+     array indexing.  We should fix this.  */
+  if (bufp->re_nsub)
+    {
+      regstart = REGEX_TALLOC (num_regs, const char *);
+      regend = REGEX_TALLOC (num_regs, const char *);
+      old_regstart = REGEX_TALLOC (num_regs, const char *);
+      old_regend = REGEX_TALLOC (num_regs, const char *);
+      best_regstart = REGEX_TALLOC (num_regs, const char *);
+      best_regend = REGEX_TALLOC (num_regs, const char *);
+      reg_info = REGEX_TALLOC (num_regs, register_info_type);
+      reg_dummy = REGEX_TALLOC (num_regs, const char *);
+      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+
+      if (!(regstart && regend && old_regstart && old_regend && reg_info
+	    && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+	{
+	  FREE_VARIABLES ();
+	  return -2;
+	}
+    }
+  else
+    {
+      /* We must initialize all our variables to NULL, so that
+	 `FREE_VARIABLES' doesn't try to free them.  */
+      regstart = regend = old_regstart = old_regend = best_regstart
+	= best_regend = reg_dummy = NULL;
+      reg_info = reg_info_dummy = (register_info_type *) NULL;
+    }
+#endif /* MATCH_MAY_ALLOCATE */
+
+  /* The starting position is bogus.  */
+  if (pos < 0 || pos > size1 + size2)
+    {
+      FREE_VARIABLES ();
+      return -1;
+    }
+
+  /* Initialize subexpression text positions to -1 to mark ones that no
+     start_memory/stop_memory has been seen for. Also initialize the
+     register information struct.  */
+  for (mcnt = 1; mcnt < num_regs; mcnt++)
+    {
+      regstart[mcnt] = regend[mcnt]
+	= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+
+      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+      IS_ACTIVE (reg_info[mcnt]) = 0;
+      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+    }
+
+  /* We move `string1' into `string2' if the latter's empty -- but not if
+     `string1' is null.	 */
+  if (size2 == 0 && string1 != NULL)
+    {
+      string2 = string1;
+      size2 = size1;
+      string1 = 0;
+      size1 = 0;
+    }
+  end1 = string1 + size1;
+  end2 = string2 + size2;
+
+  /* Compute where to stop matching, within the two strings.  */
+  if (stop <= size1)
+    {
+      end_match_1 = string1 + stop;
+      end_match_2 = string2;
+    }
+  else
+    {
+      end_match_1 = end1;
+      end_match_2 = string2 + stop - size1;
+    }
+
+  /* `p' scans through the pattern as `d' scans through the data.
+     `dend' is the end of the input string that `d' points within.  `d'
+     is advanced into the following input string whenever necessary, but
+     this happens before fetching; therefore, at the beginning of the
+     loop, `d' can be pointing at the end of a string, but it cannot
+     equal `string2'.  */
+  if (size1 > 0 && pos <= size1)
+    {
+      d = string1 + pos;
+      dend = end_match_1;
+    }
+  else
+    {
+      d = string2 + pos - size1;
+      dend = end_match_2;
+    }
+
+  DEBUG_PRINT1 ("The compiled pattern is: ");
+  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
+  DEBUG_PRINT1 ("The string to match is: `");
+  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
+  DEBUG_PRINT1 ("'\n");
+
+  /* This loops over pattern commands.	It exits by returning from the
+     function if the match is complete, or it drops through if the match
+     fails at this starting point in the input data.  */
+  for (;;)
+    {
+      DEBUG_PRINT2 ("\n0x%x: ", p);
+
+      if (p == pend)
+	{ /* End of pattern means we might have succeeded.  */
+	  DEBUG_PRINT1 ("end of pattern ... ");
+
+	  /* If we haven't matched the entire string, and we want the
+	     longest match, try backtracking.  */
+	  if (d != end_match_2)
+	    {
+	      /* 1 if this match ends in the same string (string1 or string2)
+		 as the best previous match.  */
+	      boolean same_str_p = (FIRST_STRING_P (match_end)
+				    == MATCHING_IN_FIRST_STRING);
+	      /* 1 if this match is the best seen so far.  */
+	      boolean best_match_p;
+
+	      /* AIX compiler got confused when this was combined
+		 with the previous declaration.	 */
+	      if (same_str_p)
+		best_match_p = d > match_end;
+	      else
+		best_match_p = !MATCHING_IN_FIRST_STRING;
+
+	      DEBUG_PRINT1 ("backtracking.\n");
+
+	      if (!FAIL_STACK_EMPTY ())
+		{ /* More failure points to try.  */
+
+		  /* If exceeds best match so far, save it.  */
+		  if (!best_regs_set || best_match_p)
+		    {
+		      best_regs_set = true;
+		      match_end = d;
+
+		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
+
+		      for (mcnt = 1; mcnt < num_regs; mcnt++)
+			{
+			  best_regstart[mcnt] = regstart[mcnt];
+			  best_regend[mcnt] = regend[mcnt];
+			}
+		    }
+		  goto fail;
+		}
+
+	      /* If no failure points, don't restore garbage.  And if
+		 last match is real best match, don't restore second
+		 best one. */
+	      else if (best_regs_set && !best_match_p)
+		{
+		restore_best_regs:
+		  /* Restore best match.  It may happen that `dend ==
+		     end_match_1' while the restored d is in string2.
+		     For example, the pattern `x.*y.*z' against the
+		     strings `x-' and `y-z-', if the two strings are
+		     not consecutive in memory.	 */
+		  DEBUG_PRINT1 ("Restoring best registers.\n");
+
+		  d = match_end;
+		  dend = ((d >= string1 && d <= end1)
+			   ? end_match_1 : end_match_2);
+
+		  for (mcnt = 1; mcnt < num_regs; mcnt++)
+		    {
+		      regstart[mcnt] = best_regstart[mcnt];
+		      regend[mcnt] = best_regend[mcnt];
+		    }
+		}
+	    } /* d != end_match_2 */
+
+	succeed_label:
+	  DEBUG_PRINT1 ("Accepting match.\n");
+
+	  /* If caller wants register contents data back, do it.  */
+	  if (regs && !bufp->no_sub)
+	    {
+	      /* Have the register data arrays been allocated?	*/
+	      if (bufp->regs_allocated == REGS_UNALLOCATED)
+		{ /* No.  So allocate them with malloc.	 We need one
+		     extra element beyond `num_regs' for the `-1' marker
+		     GNU code uses.  */
+		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
+		  regs->start = TALLOC (regs->num_regs, regoff_t);
+		  regs->end = TALLOC (regs->num_regs, regoff_t);
+		  if (regs->start == NULL || regs->end == NULL)
+		    {
+		      FREE_VARIABLES ();
+		      return -2;
+		    }
+		  bufp->regs_allocated = REGS_REALLOCATE;
+		}
+	      else if (bufp->regs_allocated == REGS_REALLOCATE)
+		{ /* Yes.  If we need more elements than were already
+		     allocated, reallocate them.  If we need fewer, just
+		     leave it alone.  */
+		  if (regs->num_regs < num_regs + 1)
+		    {
+		      regs->num_regs = num_regs + 1;
+		      RETALLOC (regs->start, regs->num_regs, regoff_t);
+		      RETALLOC (regs->end, regs->num_regs, regoff_t);
+		      if (regs->start == NULL || regs->end == NULL)
+			{
+			  FREE_VARIABLES ();
+			  return -2;
+			}
+		    }
+		}
+	      else
+		{
+		  /* These braces fend off a "empty body in an else-statement"
+		     warning under GCC when assert expands to nothing.	*/
+		  assert (bufp->regs_allocated == REGS_FIXED);
+		}
+
+	      /* Convert the pointer data in `regstart' and `regend' to
+		 indices.  Register zero has to be set differently,
+		 since we haven't kept track of any info for it.  */
+	      if (regs->num_regs > 0)
+		{
+		  regs->start[0] = pos;
+		  regs->end[0] = (MATCHING_IN_FIRST_STRING
+				  ? ((regoff_t) (d - string1))
+				  : ((regoff_t) (d - string2 + size1)));
+		}
+
+	      /* Go through the first `min (num_regs, regs->num_regs)'
+		 registers, since that is all we initialized.  */
+	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
+		{
+		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
+		    regs->start[mcnt] = regs->end[mcnt] = -1;
+		  else
+		    {
+		      regs->start[mcnt]
+			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
+		      regs->end[mcnt]
+			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
+		    }
+		}
+
+	      /* If the regs structure we return has more elements than
+		 were in the pattern, set the extra elements to -1.  If
+		 we (re)allocated the registers, this is the case,
+		 because we always allocate enough to have at least one
+		 -1 at the end.	 */
+	      for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+		regs->start[mcnt] = regs->end[mcnt] = -1;
+	    } /* regs && !bufp->no_sub */
+
+	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+			nfailure_points_pushed, nfailure_points_popped,
+			nfailure_points_pushed - nfailure_points_popped);
+	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
+
+	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+			    ? string1
+			    : string2 - size1);
+
+	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+
+	  FREE_VARIABLES ();
+	  return mcnt;
+	}
+
+      /* Otherwise match next pattern command.	*/
+      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
+	{
+	/* Ignore these.  Used to ignore the n of succeed_n's which
+	   currently have n == 0.  */
+	case no_op:
+	  DEBUG_PRINT1 ("EXECUTING no_op.\n");
+	  break;
+
+	case succeed:
+	  DEBUG_PRINT1 ("EXECUTING succeed.\n");
+	  goto succeed_label;
+
+	/* Match the next n pattern characters exactly.	 The following
+	   byte in the pattern defines n, and the n bytes after that
+	   are the characters to match.	 */
+	case exactn:
+	  mcnt = *p++;
+	  DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
+
+	  /* This is written out as an if-else so we don't waste time
+	     testing `translate' inside the loop.  */
+	  if (RE_TRANSLATE_P (translate))
+	    {
+#ifdef emacs
+	      if (multibyte)
+		do
+		  {
+		    int pat_charlen, buf_charlen;
+		    unsigned int pat_ch, buf_ch;
+
+		    PREFETCH ();
+		    pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
+		    buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+
+		    if (RE_TRANSLATE (translate, buf_ch)
+			!= pat_ch)
+		      goto fail;
+
+		    p += pat_charlen;
+		    d += buf_charlen;
+		    mcnt -= pat_charlen;
+		  }
+		while (mcnt > 0);
+	      else
+#endif /* not emacs */
+		do
+		  {
+		    PREFETCH ();
+		    if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d)
+			!= (unsigned char) *p++)
+		      goto fail;
+		    d++;
+		  }
+		while (--mcnt);
+	    }
+	  else
+	    {
+	      do
+		{
+		  PREFETCH ();
+		  if (*d++ != (char) *p++) goto fail;
+		}
+	      while (--mcnt);
+	    }
+	  SET_REGS_MATCHED ();
+	  break;
+
+
+	/* Match any character except possibly a newline or a null.  */
+	case anychar:
+	  {
+	    int buf_charlen;
+	    unsigned int buf_ch;
+
+	    DEBUG_PRINT1 ("EXECUTING anychar.\n");
+
+	    PREFETCH ();
+
+#ifdef emacs
+	    if (multibyte)
+	      buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+	    else
+#endif /* not emacs */
+	      {
+		buf_ch = (unsigned char) *d;
+		buf_charlen = 1;
+	      }
+
+	    buf_ch = TRANSLATE (buf_ch);
+
+	    if ((!(bufp->syntax & RE_DOT_NEWLINE)
+		 && buf_ch == '\n')
+		|| ((bufp->syntax & RE_DOT_NOT_NULL)
+		    && buf_ch == '\000'))
+	      goto fail;
+
+	    SET_REGS_MATCHED ();
+	    DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
+	    d += buf_charlen;
+	  }
+	  break;
+
+
+	case charset:
+	case charset_not:
+	  {
+	    register unsigned int c;
+	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
+	    int len;
+
+	    /* Start of actual range_table, or end of bitmap if there is no
+	       range table.  */
+	    unsigned char *range_table;
+
+	    /* Nonzero if there is range table.	 */
+	    int range_table_exists;
+
+	    /* Number of ranges of range table.	 Not in bytes.	*/
+	    int count;
+
+	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+
+	    PREFETCH ();
+	    c = (unsigned char) *d;
+
+	    range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
+	    range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
+	    if (range_table_exists)
+	      EXTRACT_NUMBER_AND_INCR (count, range_table);
+	    else
+	      count = 0;
+
+	    if (multibyte && BASE_LEADING_CODE_P (c))
+	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+
+	    if (SINGLE_BYTE_CHAR_P (c))
+	      {			/* Lookup bitmap.  */
+		c = TRANSLATE (c); /* The character to match.  */
+		len = 1;
+
+		/* Cast to `unsigned' instead of `unsigned char' in
+		   case the bit list is a full 32 bytes long.  */
+		if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
+		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+	      not = !not;
+	      }
+	    else if (range_table_exists)
+	      CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
+
+	    p = CHARSET_RANGE_TABLE_END (range_table, count);
+
+	    if (!not) goto fail;
+
+	    SET_REGS_MATCHED ();
+	    d += len;
+	    break;
+	  }
+
+
+	/* The beginning of a group is represented by start_memory.
+	   The arguments are the register number in the next byte, and the
+	   number of groups inner to this one in the next.  The text
+	   matched within the group is recorded (in the internal
+	   registers data structure) under the register number.	 */
+	case start_memory:
+	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
+
+	  /* Find out if this group can match the empty string.	 */
+	  p1 = p;		/* To send to group_match_null_string_p.  */
+
+	  if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+	    REG_MATCH_NULL_STRING_P (reg_info[*p])
+	      = group_match_null_string_p (&p1, pend, reg_info);
+
+	  /* Save the position in the string where we were the last time
+	     we were at this open-group operator in case the group is
+	     operated upon by a repetition operator, e.g., with `(a*)*b'
+	     against `ab'; then we want to ignore where we are now in
+	     the string in case this attempt to match fails.  */
+	  old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+			     ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+			     : regstart[*p];
+	  DEBUG_PRINT2 ("  old_regstart: %d\n",
+			 POINTER_TO_OFFSET (old_regstart[*p]));
+
+	  regstart[*p] = d;
+	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
+
+	  IS_ACTIVE (reg_info[*p]) = 1;
+	  MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+	  /* Clear this whenever we change the register activity status.  */
+	  set_regs_matched_done = 0;
+
+	  /* This is the new highest active register.  */
+	  highest_active_reg = *p;
+
+	  /* If nothing was active before, this is the new lowest active
+	     register.	*/
+	  if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+	    lowest_active_reg = *p;
+
+	  /* Move past the register number and inner group count.  */
+	  p += 2;
+	  just_past_start_mem = p;
+
+	  break;
+
+
+	/* The stop_memory opcode represents the end of a group.  Its
+	   arguments are the same as start_memory's: the register
+	   number, and the number of inner groups.  */
+	case stop_memory:
+	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
+
+	  /* We need to save the string position the last time we were at
+	     this close-group operator in case the group is operated
+	     upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+	     against `aba'; then we want to ignore where we are now in
+	     the string in case this attempt to match fails.  */
+	  old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+			   ? REG_UNSET (regend[*p]) ? d : regend[*p]
+			   : regend[*p];
+	  DEBUG_PRINT2 ("      old_regend: %d\n",
+			 POINTER_TO_OFFSET (old_regend[*p]));
+
+	  regend[*p] = d;
+	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
+
+	  /* This register isn't active anymore.  */
+	  IS_ACTIVE (reg_info[*p]) = 0;
+
+	  /* Clear this whenever we change the register activity status.  */
+	  set_regs_matched_done = 0;
+
+	  /* If this was the only register active, nothing is active
+	     anymore.  */
+	  if (lowest_active_reg == highest_active_reg)
+	    {
+	      lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+	      highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+	    }
+	  else
+	    { /* We must scan for the new highest active register, since
+		 it isn't necessarily one less than now: consider
+		 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
+		 new highest active register is 1.  */
+	      unsigned char r = *p - 1;
+	      while (r > 0 && !IS_ACTIVE (reg_info[r]))
+		r--;
+
+	      /* If we end up at register zero, that means that we saved
+		 the registers as the result of an `on_failure_jump', not
+		 a `start_memory', and we jumped to past the innermost
+		 `stop_memory'.	 For example, in ((.)*) we save
+		 registers 1 and 2 as a result of the *, but when we pop
+		 back to the second ), we are at the stop_memory 1.
+		 Thus, nothing is active.  */
+	      if (r == 0)
+		{
+		  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+		  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+		}
+	      else
+		highest_active_reg = r;
+	    }
+
+	  /* If just failed to match something this time around with a
+	     group that's operated on by a repetition operator, try to
+	     force exit from the ``loop'', and restore the register
+	     information for this group that we had before trying this
+	     last match.  */
+	  if ((!MATCHED_SOMETHING (reg_info[*p])
+	       || just_past_start_mem == p - 1)
+	      && (p + 2) < pend)
+	    {
+	      boolean is_a_jump_n = false;
+
+	      p1 = p + 2;
+	      mcnt = 0;
+	      switch ((re_opcode_t) *p1++)
+		{
+		  case jump_n:
+		    is_a_jump_n = true;
+		  case pop_failure_jump:
+		  case maybe_pop_jump:
+		  case jump:
+		  case dummy_failure_jump:
+		    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+		    if (is_a_jump_n)
+		      p1 += 2;
+		    break;
+
+		  default:
+		    /* do nothing */ ;
+		}
+	      p1 += mcnt;
+
+	      /* If the next operation is a jump backwards in the pattern
+		 to an on_failure_jump right before the start_memory
+		 corresponding to this stop_memory, exit from the loop
+		 by forcing a failure after pushing on the stack the
+		 on_failure_jump's jump in the pattern, and d.	*/
+	      if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
+		  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
+		{
+		  /* If this group ever matched anything, then restore
+		     what its registers were before trying this last
+		     failed match, e.g., with `(a*)*b' against `ab' for
+		     regstart[1], and, e.g., with `((a*)*(b*)*)*'
+		     against `aba' for regend[3].
+
+		     Also restore the registers for inner groups for,
+		     e.g., `((a*)(b*))*' against `aba' (register 3 would
+		     otherwise get trashed).  */
+
+		  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+		    {
+		      unsigned r;
+
+		      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+		      /* Restore this and inner groups' (if any) registers.  */
+		      for (r = *p; r < *p + *(p + 1); r++)
+			{
+			  regstart[r] = old_regstart[r];
+
+			  /* xx why this test?	*/
+			  if (old_regend[r] >= regstart[r])
+			    regend[r] = old_regend[r];
+			}
+		    }
+		  p1++;
+		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+		  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
+
+		  goto fail;
+		}
+	    }
+
+	  /* Move past the register number and the inner group count.  */
+	  p += 2;
+	  break;
+
+
+	/* \<digit> has been turned into a `duplicate' command which is
+	   followed by the numeric value of <digit> as the register number.  */
+	case duplicate:
+	  {
+	    register const char *d2, *dend2;
+	    int regno = *p++;	/* Get which register to match against.	 */
+	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
+
+	    /* Can't back reference a group which we've never matched.	*/
+	    if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
+	      goto fail;
+
+	    /* Where in input to try to start matching.	 */
+	    d2 = regstart[regno];
+
+	    /* Where to stop matching; if both the place to start and
+	       the place to stop matching are in the same string, then
+	       set to the place to stop, otherwise, for now have to use
+	       the end of the first string.  */
+
+	    dend2 = ((FIRST_STRING_P (regstart[regno])
+		      == FIRST_STRING_P (regend[regno]))
+		     ? regend[regno] : end_match_1);
+	    for (;;)
+	      {
+		/* If necessary, advance to next segment in register
+		   contents.  */
+		while (d2 == dend2)
+		  {
+		    if (dend2 == end_match_2) break;
+		    if (dend2 == regend[regno]) break;
+
+		    /* End of string1 => advance to string2. */
+		    d2 = string2;
+		    dend2 = regend[regno];
+		  }
+		/* At end of register contents => success */
+		if (d2 == dend2) break;
+
+		/* If necessary, advance to next segment in data.  */
+		PREFETCH ();
+
+		/* How many characters left in this segment to match.  */
+		mcnt = dend - d;
+
+		/* Want how many consecutive characters we can match in
+		   one shot, so, if necessary, adjust the count.  */
+		if (mcnt > dend2 - d2)
+		  mcnt = dend2 - d2;
+
+		/* Compare that many; failure if mismatch, else move
+		   past them.  */
+		if (RE_TRANSLATE_P (translate)
+		    ? bcmp_translate (d, d2, mcnt, translate)
+		    : bcmp (d, d2, mcnt))
+		  goto fail;
+		d += mcnt, d2 += mcnt;
+
+		/* Do this because we've match some characters.	 */
+		SET_REGS_MATCHED ();
+	      }
+	  }
+	  break;
+
+
+	/* begline matches the empty string at the beginning of the string
+	   (unless `not_bol' is set in `bufp'), and, if
+	   `newline_anchor' is set, after newlines.  */
+	case begline:
+	  DEBUG_PRINT1 ("EXECUTING begline.\n");
+
+	  if (AT_STRINGS_BEG (d))
+	    {
+	      if (!bufp->not_bol) break;
+	    }
+	  else if (d[-1] == '\n' && bufp->newline_anchor)
+	    {
+	      break;
+	    }
+	  /* In all other cases, we fail.  */
+	  goto fail;
+
+
+	/* endline is the dual of begline.  */
+	case endline:
+	  DEBUG_PRINT1 ("EXECUTING endline.\n");
+
+	  if (AT_STRINGS_END (d))
+	    {
+	      if (!bufp->not_eol) break;
+	    }
+
+	  /* We have to ``prefetch'' the next character.  */
+	  else if ((d == end1 ? *string2 : *d) == '\n'
+		   && bufp->newline_anchor)
+	    {
+	      break;
+	    }
+	  goto fail;
+
+
+	/* Match at the very beginning of the data.  */
+	case begbuf:
+	  DEBUG_PRINT1 ("EXECUTING begbuf.\n");
+	  if (AT_STRINGS_BEG (d))
+	    break;
+	  goto fail;
+
+
+	/* Match at the very end of the data.  */
+	case endbuf:
+	  DEBUG_PRINT1 ("EXECUTING endbuf.\n");
+	  if (AT_STRINGS_END (d))
+	    break;
+	  goto fail;
+
+
+	/* on_failure_keep_string_jump is used to optimize `.*\n'.  It
+	   pushes NULL as the value for the string on the stack.  Then
+	   `pop_failure_point' will keep the current value for the
+	   string, instead of restoring it.  To see why, consider
+	   matching `foo\nbar' against `.*\n'.	The .* matches the foo;
+	   then the . fails against the \n.  But the next thing we want
+	   to do is match the \n against the \n; if we restored the
+	   string value, we would be back at the foo.
+
+	   Because this is used only in specific cases, we don't need to
+	   check all the things that `on_failure_jump' does, to make
+	   sure the right things get saved on the stack.  Hence we don't
+	   share its code.  The only reason to push anything on the
+	   stack at all is that otherwise we would have to change
+	   `anychar's code to do something besides goto fail in this
+	   case; that seems worse than this.  */
+	case on_failure_keep_string_jump:
+	  DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
+
+	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
+	  DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
+
+	  PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
+	  break;
+
+
+	/* Uses of on_failure_jump:
+
+	   Each alternative starts with an on_failure_jump that points
+	   to the beginning of the next alternative.  Each alternative
+	   except the last ends with a jump that in effect jumps past
+	   the rest of the alternatives.  (They really jump to the
+	   ending jump of the following alternative, because tensioning
+	   these jumps is a hassle.)
+
+	   Repeats start with an on_failure_jump that points past both
+	   the repetition text and either the following jump or
+	   pop_failure_jump back to this on_failure_jump.  */
+	case on_failure_jump:
+	on_failure:
+	  DEBUG_PRINT1 ("EXECUTING on_failure_jump");
+
+#if defined (WINDOWSNT) && defined (emacs)
+	  QUIT;
+#endif
+
+	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
+	  DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
+
+	  /* If this on_failure_jump comes right before a group (i.e.,
+	     the original * applied to a group), save the information
+	     for that group and all inner ones, so that if we fail back
+	     to this point, the group's information will be correct.
+	     For example, in \(a*\)*\1, we need the preceding group,
+	     and in \(zz\(a*\)b*\)\2, we need the inner group.	*/
+
+	  /* We can't use `p' to check ahead because we push
+	     a failure point to `p + mcnt' after we do this.  */
+	  p1 = p;
+
+	  /* We need to skip no_op's before we look for the
+	     start_memory in case this on_failure_jump is happening as
+	     the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+	     against aba.  */
+	  while (p1 < pend && (re_opcode_t) *p1 == no_op)
+	    p1++;
+
+	  if (p1 < pend && (re_opcode_t) *p1 == start_memory)
+	    {
+	      /* We have a new highest active register now.  This will
+		 get reset at the start_memory we are about to get to,
+		 but we will have saved all the registers relevant to
+		 this repetition op, as described above.  */
+	      highest_active_reg = *(p1 + 1) + *(p1 + 2);
+	      if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+		lowest_active_reg = *(p1 + 1);
+	    }
+
+	  DEBUG_PRINT1 (":\n");
+	  PUSH_FAILURE_POINT (p + mcnt, d, -2);
+	  break;
+
+
+	/* A smart repeat ends with `maybe_pop_jump'.
+	   We change it to either `pop_failure_jump' or `jump'.	 */
+	case maybe_pop_jump:
+#if defined (WINDOWSNT) && defined (emacs)
+	  QUIT;
+#endif
+	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
+	  DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
+	  {
+	    register unsigned char *p2 = p;
+
+	    /* Compare the beginning of the repeat with what in the
+	       pattern follows its end. If we can establish that there
+	       is nothing that they would both match, i.e., that we
+	       would have to backtrack because of (as in, e.g., `a*a')
+	       then we can change to pop_failure_jump, because we'll
+	       never have to backtrack.
+
+	       This is not true in the case of alternatives: in
+	       `(a|ab)*' we do need to backtrack to the `ab' alternative
+	       (e.g., if the string was `ab').	But instead of trying to
+	       detect that here, the alternative has put on a dummy
+	       failure point which is what we will end up popping.  */
+
+	    /* Skip over open/close-group commands.
+	       If what follows this loop is a ...+ construct,
+	       look at what begins its body, since we will have to
+	       match at least one of that.  */
+	    while (1)
+	      {
+		if (p2 + 2 < pend
+		    && ((re_opcode_t) *p2 == stop_memory
+			|| (re_opcode_t) *p2 == start_memory))
+		  p2 += 3;
+		else if (p2 + 6 < pend
+			 && (re_opcode_t) *p2 == dummy_failure_jump)
+		  p2 += 6;
+		else
+		  break;
+	      }
+
+	    p1 = p + mcnt;
+	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+	       to the `maybe_finalize_jump' of this case.  Examine what
+	       follows.	 */
+
+	    /* If we're at the end of the pattern, we can change.  */
+	    if (p2 == pend)
+	      {
+		/* Consider what happens when matching ":\(.*\)"
+		   against ":/".  I don't really understand this code
+		   yet.	 */
+		p[-3] = (unsigned char) pop_failure_jump;
+		DEBUG_PRINT1
+		  ("  End of pattern: change to `pop_failure_jump'.\n");
+	      }
+
+	    else if ((re_opcode_t) *p2 == exactn
+		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
+	      {
+		register unsigned int c
+		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
+
+		if ((re_opcode_t) p1[3] == exactn)
+		  {
+		    if (!(multibyte /* && (c != '\n') */
+			  && BASE_LEADING_CODE_P (c))
+			? c != p1[5]
+			: (STRING_CHAR (&p2[2], pend - &p2[2])
+			   != STRING_CHAR (&p1[5], pend - &p1[5])))
+		  {
+		    p[-3] = (unsigned char) pop_failure_jump;
+		    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
+				  c, p1[5]);
+		  }
+		  }
+
+		else if ((re_opcode_t) p1[3] == charset
+			 || (re_opcode_t) p1[3] == charset_not)
+		  {
+		    int not = (re_opcode_t) p1[3] == charset_not;
+
+		    if (multibyte /* && (c != '\n') */
+			&& BASE_LEADING_CODE_P (c))
+		      c = STRING_CHAR (&p2[2], pend - &p2[2]);
+
+		    /* Test if C is listed in charset (or charset_not)
+		       at `&p1[3]'.  */
+		    if (SINGLE_BYTE_CHAR_P (c))
+		      {
+			if (c < CHARSET_BITMAP_SIZE (&p1[3]) * BYTEWIDTH
+			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+		      not = !not;
+		      }
+		    else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3]))
+		      CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]);
+
+		    /* `not' is equal to 1 if c would match, which means
+			that we can't change to pop_failure_jump.  */
+		    if (!not)
+		      {
+			p[-3] = (unsigned char) pop_failure_jump;
+			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
+		      }
+		  }
+	      }
+	    else if ((re_opcode_t) *p2 == charset)
+	      {
+		if ((re_opcode_t) p1[3] == exactn)
+		  {
+		    register unsigned int c = p1[5];
+		    int not = 0;
+
+		    if (multibyte && BASE_LEADING_CODE_P (c))
+		      c = STRING_CHAR (&p1[5], pend - &p1[5]);
+
+		    /* Test if C is listed in charset at `p2'.	*/
+		    if (SINGLE_BYTE_CHAR_P (c))
+		      {
+			if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH
+			    && (p2[2 + c / BYTEWIDTH]
+				& (1 << (c % BYTEWIDTH))))
+			  not = !not;
+		      }
+		    else if (CHARSET_RANGE_TABLE_EXISTS_P (p2))
+		      CHARSET_LOOKUP_RANGE_TABLE (not, c, p2);
+
+		    if (!not)
+		  {
+		    p[-3] = (unsigned char) pop_failure_jump;
+			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
+		      }
+		  }
+
+		/* It is hard to list up all the character in charset
+		   P2 if it includes multibyte character.  Give up in
+		   such case.  */
+		else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
+		  {
+		    /* Now, we are sure that P2 has no range table.
+		       So, for the size of bitmap in P2, `p2[1]' is
+		       enough.	But P1 may have range table, so the
+		       size of bitmap table of P1 is extracted by
+		       using macro `CHARSET_BITMAP_SIZE'.
+
+		       Since we know that all the character listed in
+		       P2 is ASCII, it is enough to test only bitmap
+		       table of P1.  */
+
+		    if ((re_opcode_t) p1[3] == charset_not)
+		  {
+		    int idx;
+			/* We win if the charset_not inside the loop lists
+			   every character listed in the charset after.	 */
+		    for (idx = 0; idx < (int) p2[1]; idx++)
+		      if (! (p2[2 + idx] == 0
+				 || (idx < CHARSET_BITMAP_SIZE (&p1[3])
+				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
+			break;
+
+		    if (idx == p2[1])
+		      {
+			p[-3] = (unsigned char) pop_failure_jump;
+			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
+		      }
+		  }
+		else if ((re_opcode_t) p1[3] == charset)
+		  {
+		    int idx;
+		    /* We win if the charset inside the loop
+		       has no overlap with the one after the loop.  */
+		    for (idx = 0;
+			     (idx < (int) p2[1]
+			      && idx < CHARSET_BITMAP_SIZE (&p1[3]));
+			 idx++)
+		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
+			break;
+
+			if (idx == p2[1]
+			    || idx == CHARSET_BITMAP_SIZE (&p1[3]))
+		      {
+			p[-3] = (unsigned char) pop_failure_jump;
+			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
+		      }
+		  }
+	      }
+	  }
+	  }
+	  p -= 2;		/* Point at relative address again.  */
+	  if ((re_opcode_t) p[-1] != pop_failure_jump)
+	    {
+	      p[-1] = (unsigned char) jump;
+	      DEBUG_PRINT1 ("  Match => jump.\n");
+	      goto unconditional_jump;
+	    }
+	/* Note fall through.  */
+
+
+	/* The end of a simple repeat has a pop_failure_jump back to
+	   its matching on_failure_jump, where the latter will push a
+	   failure point.  The pop_failure_jump takes off failure
+	   points put on by this pop_failure_jump's matching
+	   on_failure_jump; we got through the pattern to here from the
+	   matching on_failure_jump, so didn't fail.  */
+	case pop_failure_jump:
+	  {
+	    /* We need to pass separate storage for the lowest and
+	       highest registers, even though we don't care about the
+	       actual values.  Otherwise, we will restore only one
+	       register from the stack, since lowest will == highest in
+	       `pop_failure_point'.  */
+	    unsigned dummy_low_reg, dummy_high_reg;
+	    unsigned char *pdummy;
+	    const char *sdummy;
+
+	    DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
+	    POP_FAILURE_POINT (sdummy, pdummy,
+			       dummy_low_reg, dummy_high_reg,
+			       reg_dummy, reg_dummy, reg_info_dummy);
+	  }
+	  /* Note fall through.	 */
+
+
+	/* Unconditionally jump (without popping any failure points).  */
+	case jump:
+	unconditional_jump:
+#if defined (WINDOWSNT) && defined (emacs)
+	  QUIT;
+#endif
+	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
+	  DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
+	  p += mcnt;				/* Do the jump.	 */
+	  DEBUG_PRINT2 ("(to 0x%x).\n", p);
+	  break;
+
+
+	/* We need this opcode so we can detect where alternatives end
+	   in `group_match_null_string_p' et al.  */
+	case jump_past_alt:
+	  DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+	  goto unconditional_jump;
+
+
+	/* Normally, the on_failure_jump pushes a failure point, which
+	   then gets popped at pop_failure_jump.  We will end up at
+	   pop_failure_jump, also, and with a pattern of, say, `a+', we
+	   are skipping over the on_failure_jump, so we have to push
+	   something meaningless for pop_failure_jump to pop.  */
+	case dummy_failure_jump:
+	  DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
+	  /* It doesn't matter what we push for the string here.  What
+	     the code at `fail' tests is the value for the pattern.  */
+	  PUSH_FAILURE_POINT (0, 0, -2);
+	  goto unconditional_jump;
+
+
+	/* At the end of an alternative, we need to push a dummy failure
+	   point in case we are followed by a `pop_failure_jump', because
+	   we don't want the failure point for the alternative to be
+	   popped.  For example, matching `(a|ab)*' against `aab'
+	   requires that we match the `ab' alternative.	 */
+	case push_dummy_failure:
+	  DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+	  /* See comments just above at `dummy_failure_jump' about the
+	     two zeroes.  */
+	  PUSH_FAILURE_POINT (0, 0, -2);
+	  break;
+
+	/* Have to succeed matching what follows at least n times.
+	   After that, handle like `on_failure_jump'.  */
+	case succeed_n:
+	  EXTRACT_NUMBER (mcnt, p + 2);
+	  DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+
+	  assert (mcnt >= 0);
+	  /* Originally, this is how many times we HAVE to succeed.  */
+	  if (mcnt > 0)
+	    {
+	       mcnt--;
+	       p += 2;
+	       STORE_NUMBER_AND_INCR (p, mcnt);
+	       DEBUG_PRINT3 ("	Setting 0x%x to %d.\n", p, mcnt);
+	    }
+	  else if (mcnt == 0)
+	    {
+	      DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
+	      p[2] = (unsigned char) no_op;
+	      p[3] = (unsigned char) no_op;
+	      goto on_failure;
+	    }
+	  break;
+
+	case jump_n:
+	  EXTRACT_NUMBER (mcnt, p + 2);
+	  DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
+
+	  /* Originally, this is how many times we CAN jump.  */
+	  if (mcnt)
+	    {
+	       mcnt--;
+	       STORE_NUMBER (p + 2, mcnt);
+	       goto unconditional_jump;
+	    }
+	  /* If don't have to jump any more, skip over the rest of command.  */
+	  else
+	    p += 4;
+	  break;
+
+	case set_number_at:
+	  {
+	    DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
+
+	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
+	    p1 = p + mcnt;
+	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
+	    DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
+	    STORE_NUMBER (p1, mcnt);
+	    break;
+	  }
+
+	case wordbound:
+	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+
+	  /* We SUCCEED in one of the following cases: */
+
+	  /* Case 1: D is at the beginning or the end of string.  */
+	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
+	    break;
+	  else
+	    {
+	      /* C1 is the character before D, S1 is the syntax of C1, C2
+		 is the character at D, and S2 is the syntax of C2.  */
+	      int c1, c2, s1, s2;
+	      int pos1 = PTR_TO_OFFSET (d - 1);
+	      int charpos;
+
+	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+	      UPDATE_SYNTAX_TABLE (charpos);
+#endif
+	      s1 = SYNTAX (c1);
+#ifdef emacs
+	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+#endif
+	      s2 = SYNTAX (c2);
+
+	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
+		  ((s1 == Sword) != (s2 == Sword))
+		  /* Case 3: Both of S1 and S2 are Sword, and macro
+		     WORD_BOUNDARY_P (C1, C2) returns nonzero.	*/
+		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
+	    break;
+	}
+	  goto fail;
+
+      case notwordbound:
+	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+
+	  /* We FAIL in one of the following cases: */
+
+	  /* Case 1: D is at the beginning or the end of string.  */
+	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
+	    goto fail;
+	  else
+	    {
+	      /* C1 is the character before D, S1 is the syntax of C1, C2
+		 is the character at D, and S2 is the syntax of C2.  */
+	      int c1, c2, s1, s2;
+	      int pos1 = PTR_TO_OFFSET (d - 1);
+	      int charpos;
+
+	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+	      UPDATE_SYNTAX_TABLE (charpos);
+#endif
+	      s1 = SYNTAX (c1);
+#ifdef emacs
+	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+#endif
+	      s2 = SYNTAX (c2);
+
+	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
+		  ((s1 == Sword) != (s2 == Sword))
+		  /* Case 3: Both of S1 and S2 are Sword, and macro
+		     WORD_BOUNDARY_P (C1, C2) returns nonzero.	*/
+		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
+	    goto fail;
+	}
+	  break;
+
+	case wordbeg:
+	  DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+
+	  /* We FAIL in one of the following cases: */
+
+	  /* Case 1: D is at the end of string.	 */
+	  if (AT_STRINGS_END (d))
+	  goto fail;
+	  else
+	    {
+	      /* C1 is the character before D, S1 is the syntax of C1, C2
+		 is the character at D, and S2 is the syntax of C2.  */
+	      int c1, c2, s1, s2;
+	      int pos1 = PTR_TO_OFFSET (d);
+	      int charpos;
+
+	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+	      UPDATE_SYNTAX_TABLE (charpos);
+#endif
+	      s2 = SYNTAX (c2);
+	
+	      /* Case 2: S2 is not Sword. */
+	      if (s2 != Sword)
+		goto fail;
+
+	      /* Case 3: D is not at the beginning of string ... */
+	      if (!AT_STRINGS_BEG (d))
+		{
+		  GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+		  UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
+#endif
+		  s1 = SYNTAX (c1);
+
+		  /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
+		     returns 0.	 */
+		  if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
+		    goto fail;
+		}
+	    }
+	  break;
+
+	case wordend:
+	  DEBUG_PRINT1 ("EXECUTING wordend.\n");
+
+	  /* We FAIL in one of the following cases: */
+
+	  /* Case 1: D is at the beginning of string.  */
+	  if (AT_STRINGS_BEG (d))
+	    goto fail;
+	  else
+	    {
+	      /* C1 is the character before D, S1 is the syntax of C1, C2
+		 is the character at D, and S2 is the syntax of C2.  */
+	      int c1, c2, s1, s2;
+	      int pos1 = PTR_TO_OFFSET (d);
+	      int charpos;
+
+	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1);
+	      UPDATE_SYNTAX_TABLE (charpos);
+#endif
+	      s1 = SYNTAX (c1);
+
+	      /* Case 2: S1 is not Sword.  */
+	      if (s1 != Sword)
+		goto fail;
+
+	      /* Case 3: D is not at the end of string ... */
+	      if (!AT_STRINGS_END (d))
+		{
+		  GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+		  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+#endif
+		  s2 = SYNTAX (c2);
+
+		  /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
+		     returns 0.	 */
+		  if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
+	  goto fail;
+		}
+	    }
+	  break;
+
+#ifdef emacs
+	case before_dot:
+	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+	  if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE)
+	    goto fail;
+	  break;
+
+	case at_dot:
+	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+	  if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE)
+	    goto fail;
+	  break;
+
+	case after_dot:
+	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+	  if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE)
+	    goto fail;
+	  break;
+
+	case syntaxspec:
+	  DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+	  mcnt = *p++;
+	  goto matchsyntax;
+
+	case wordchar:
+	  DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
+	  mcnt = (int) Sword;
+	matchsyntax:
+	  PREFETCH ();
+#ifdef emacs
+	  {
+	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+	    UPDATE_SYNTAX_TABLE (pos1);
+	  }
+#endif
+	  {
+	    int c, len;
+
+	    if (multibyte)
+	      /* we must concern about multibyte form, ... */
+	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+	    else
+	      /* everything should be handled as ASCII, even though it
+		 looks like multibyte form.  */
+	      c = *d, len = 1;
+
+	    if (SYNTAX (c) != (enum syntaxcode) mcnt)
+	    goto fail;
+	    d += len;
+	  }
+	  SET_REGS_MATCHED ();
+	  break;
+
+	case notsyntaxspec:
+	  DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
+	  mcnt = *p++;
+	  goto matchnotsyntax;
+
+	case notwordchar:
+	  DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
+	  mcnt = (int) Sword;
+	matchnotsyntax:
+	  PREFETCH ();
+#ifdef emacs
+	  {
+	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+	    UPDATE_SYNTAX_TABLE (pos1);
+	  }
+#endif
+	  {
+	    int c, len;
+
+	    if (multibyte)
+	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+	    else
+	      c = *d, len = 1;
+
+	    if (SYNTAX (c) == (enum syntaxcode) mcnt)
+	    goto fail;
+	    d += len;
+	  }
+	  SET_REGS_MATCHED ();
+	  break;
+
+	case categoryspec:
+	  DEBUG_PRINT2 ("EXECUTING categoryspec %d.\n", *p);
+	  mcnt = *p++;
+	  PREFETCH ();
+	  {
+	    int c, len;
+
+	    if (multibyte)
+	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+	    else
+	      c = *d, len = 1;
+
+	    if (!CHAR_HAS_CATEGORY (c, mcnt))
+	      goto fail;
+	    d += len;
+	  }
+	  SET_REGS_MATCHED ();
+	  break;
+
+	case notcategoryspec:
+	  DEBUG_PRINT2 ("EXECUTING notcategoryspec %d.\n", *p);
+	  mcnt = *p++;
+	  PREFETCH ();
+	  {
+	    int c, len;
+
+	    if (multibyte)
+	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+	    else
+	      c = *d, len = 1;
+
+	    if (CHAR_HAS_CATEGORY (c, mcnt))
+	      goto fail;
+	    d += len;
+	  }
+	  SET_REGS_MATCHED ();
+          break;
+
+#else /* not emacs */
+	case wordchar:
+          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+	  PREFETCH ();
+          if (!WORDCHAR_P (d))
+            goto fail;
+	  SET_REGS_MATCHED ();
+          d++;
+	  break;
+
+	case notwordchar:
+          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+	  PREFETCH ();
+	  if (WORDCHAR_P (d))
+            goto fail;
+          SET_REGS_MATCHED ();
+          d++;
+	  break;
+#endif /* not emacs */
+
+        default:
+          abort ();
+	}
+      continue;  /* Successfully executed one pattern command; keep going.  */
+
+
+    /* We goto here if a matching operation fails. */
+    fail:
+#if defined (WINDOWSNT) && defined (emacs)
+      QUIT;
+#endif
+      if (!FAIL_STACK_EMPTY ())
+	{ /* A restart point is known.  Restore to that state.  */
+          DEBUG_PRINT1 ("\nFAIL:\n");
+          POP_FAILURE_POINT (d, p,
+                             lowest_active_reg, highest_active_reg,
+                             regstart, regend, reg_info);
+
+          /* If this failure point is a dummy, try the next one.  */
+          if (!p)
+	    goto fail;
+
+          /* If we failed to the end of the pattern, don't examine *p.  */
+	  assert (p <= pend);
+          if (p < pend)
+            {
+              boolean is_a_jump_n = false;
+
+              /* If failed to a backwards jump that's part of a repetition
+                 loop, need to pop this failure point and use the next one.  */
+              switch ((re_opcode_t) *p)
+                {
+                case jump_n:
+                  is_a_jump_n = true;
+                case maybe_pop_jump:
+                case pop_failure_jump:
+                case jump:
+                  p1 = p + 1;
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                  p1 += mcnt;
+
+                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+                      || (!is_a_jump_n
+                          && (re_opcode_t) *p1 == on_failure_jump))
+                    goto fail;
+                  break;
+                default:
+                  /* do nothing */ ;
+                }
+            }
+
+          if (d >= string1 && d <= end1)
+	    dend = end_match_1;
+        }
+      else
+        break;   /* Matching at this starting point really fails.  */
+    } /* for (;;) */
+
+  if (best_regs_set)
+    goto restore_best_regs;
+
+  FREE_VARIABLES ();
+
+  return -1;         			/* Failure to match.  */
+} /* re_match_2 */
+
+/* Subroutine definitions for re_match_2.  */
+
+
+/* We are passed P pointing to a register number after a start_memory.
+
+   Return true if the pattern up to the corresponding stop_memory can
+   match the empty string, and false otherwise.
+
+   If we find the matching stop_memory, sets P to point to one past its number.
+   Otherwise, sets P to an undefined byte less than or equal to END.
+
+   We don't handle duplicates properly (yet).  */
+
+static boolean
+group_match_null_string_p (p, end, reg_info)
+    unsigned char **p, *end;
+    register_info_type *reg_info;
+{
+  int mcnt;
+  /* Point to after the args to the start_memory.  */
+  unsigned char *p1 = *p + 2;
+
+  while (p1 < end)
+    {
+      /* Skip over opcodes that can match nothing, and return true or
+	 false, as appropriate, when we get to one that can't, or to the
+         matching stop_memory.  */
+
+      switch ((re_opcode_t) *p1)
+        {
+        /* Could be either a loop or a series of alternatives.  */
+        case on_failure_jump:
+          p1++;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+          /* If the next operation is not a jump backwards in the
+	     pattern.  */
+
+	  if (mcnt >= 0)
+	    {
+              /* Go through the on_failure_jumps of the alternatives,
+                 seeing if any of the alternatives cannot match nothing.
+                 The last alternative starts with only a jump,
+                 whereas the rest start with on_failure_jump and end
+                 with a jump, e.g., here is the pattern for `a|b|c':
+
+                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+                 /exactn/1/c
+
+                 So, we have to first go through the first (n-1)
+                 alternatives and then deal with the last one separately.  */
+
+
+              /* Deal with the first (n-1) alternatives, which start
+                 with an on_failure_jump (see above) that jumps to right
+                 past a jump_past_alt.  */
+
+              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+                {
+                  /* `mcnt' holds how many bytes long the alternative
+                     is, including the ending `jump_past_alt' and
+                     its number.  */
+
+                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+				                      reg_info))
+                    return false;
+
+                  /* Move to right after this alternative, including the
+		     jump_past_alt.  */
+                  p1 += mcnt;
+
+                  /* Break if it's the beginning of an n-th alternative
+                     that doesn't begin with an on_failure_jump.  */
+                  if ((re_opcode_t) *p1 != on_failure_jump)
+                    break;
+
+		  /* Still have to check that it's not an n-th
+		     alternative that starts with an on_failure_jump.  */
+		  p1++;
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+                    {
+		      /* Get to the beginning of the n-th alternative.  */
+                      p1 -= 3;
+                      break;
+                    }
+                }
+
+              /* Deal with the last alternative: go back and get number
+                 of the `jump_past_alt' just before it.  `mcnt' contains
+                 the length of the alternative.  */
+              EXTRACT_NUMBER (mcnt, p1 - 2);
+
+              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+                return false;
+
+              p1 += mcnt;	/* Get past the n-th alternative.  */
+            } /* if mcnt > 0 */
+          break;
+
+
+        case stop_memory:
+	  assert (p1[1] == **p);
+          *p = p1 + 2;
+          return true;
+
+
+        default:
+          if (!common_op_match_null_string_p (&p1, end, reg_info))
+            return false;
+        }
+    } /* while p1 < end */
+
+  return false;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+   It expects P to be the first byte of a single alternative and END one
+   byte past the last. The alternative can contain groups.  */
+
+static boolean
+alt_match_null_string_p (p, end, reg_info)
+    unsigned char *p, *end;
+    register_info_type *reg_info;
+{
+  int mcnt;
+  unsigned char *p1 = p;
+
+  while (p1 < end)
+    {
+      /* Skip over opcodes that can match nothing, and break when we get
+         to one that can't.  */
+
+      switch ((re_opcode_t) *p1)
+        {
+	/* It's a loop.  */
+        case on_failure_jump:
+          p1++;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+          p1 += mcnt;
+          break;
+
+	default:
+          if (!common_op_match_null_string_p (&p1, end, reg_info))
+            return false;
+        }
+    }  /* while p1 < end */
+
+  return true;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+   alt_match_null_string_p.
+
+   Sets P to one after the op and its arguments, if any.  */
+
+static boolean
+common_op_match_null_string_p (p, end, reg_info)
+    unsigned char **p, *end;
+    register_info_type *reg_info;
+{
+  int mcnt;
+  boolean ret;
+  int reg_no;
+  unsigned char *p1 = *p;
+
+  switch ((re_opcode_t) *p1++)
+    {
+    case no_op:
+    case begline:
+    case endline:
+    case begbuf:
+    case endbuf:
+    case wordbeg:
+    case wordend:
+    case wordbound:
+    case notwordbound:
+#ifdef emacs
+    case before_dot:
+    case at_dot:
+    case after_dot:
+#endif
+      break;
+
+    case start_memory:
+      reg_no = *p1;
+      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
+      ret = group_match_null_string_p (&p1, end, reg_info);
+
+      /* Have to set this here in case we're checking a group which
+         contains a group and a back reference to it.  */
+
+      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+
+      if (!ret)
+        return false;
+      break;
+
+    /* If this is an optimized succeed_n for zero times, make the jump.  */
+    case jump:
+      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+      if (mcnt >= 0)
+        p1 += mcnt;
+      else
+        return false;
+      break;
+
+    case succeed_n:
+      /* Get to the number of times to succeed.  */
+      p1 += 2;
+      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+      if (mcnt == 0)
+        {
+          p1 -= 4;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+          p1 += mcnt;
+        }
+      else
+        return false;
+      break;
+
+    case duplicate:
+      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+        return false;
+      break;
+
+    case set_number_at:
+      p1 += 4;
+
+    default:
+      /* All other opcodes mean we cannot match the empty string.  */
+      return false;
+  }
+
+  *p = p1;
+  return true;
+} /* common_op_match_null_string_p */
+
+
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+   bytes; nonzero otherwise.  */
+
+static int
+bcmp_translate (s1, s2, len, translate)
+     unsigned char *s1, *s2;
+     register int len;
+     RE_TRANSLATE_TYPE translate;
+{
+  register unsigned char *p1 = s1, *p2 = s2;
+  unsigned char *p1_end = s1 + len;
+  unsigned char *p2_end = s2 + len;
+
+  while (p1 != p1_end && p2 != p2_end)
+    {
+      int p1_charlen, p2_charlen;
+      int p1_ch, p2_ch;
+
+      p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
+      p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
+
+      if (RE_TRANSLATE (translate, p1_ch)
+	  != RE_TRANSLATE (translate, p2_ch))
+	return 1;
+
+      p1 += p1_charlen, p2 += p2_charlen;
+    }
+
+  if (p1 != p1_end || p2 != p2_end)
+    return 1;
+
+  return 0;
+}
+
+/* Entry points for GNU code.  */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+   compiles PATTERN (of length SIZE) and puts the result in BUFP.
+   Returns 0 if the pattern was valid, otherwise an error string.
+
+   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+   are set in BUFP on entry.
+
+   We call regex_compile to do the actual compilation.  */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+     const char *pattern;
+     int length;
+     struct re_pattern_buffer *bufp;
+{
+  reg_errcode_t ret;
+
+  /* GNU code is written to assume at least RE_NREGS registers will be set
+     (and at least one extra will be -1).  */
+  bufp->regs_allocated = REGS_UNALLOCATED;
+
+  /* And GNU code determines whether or not to get register information
+     by passing null for the REGS argument to re_match, etc., not by
+     setting no_sub.  */
+  bufp->no_sub = 0;
+
+  /* Match anchors at newline.  */
+  bufp->newline_anchor = 1;
+
+  ret = regex_compile (pattern, length, re_syntax_options, bufp);
+
+  if (!ret)
+    return NULL;
+  return gettext (re_error_msgid[(int) ret]);
+}
+
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
+
+#if defined (_REGEX_RE_COMP) || defined (_LIBC)
+
+/* BSD has one and only one pattern buffer.  */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+#ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+   these names if they don't use our functions, and still use
+   regcomp/regexec below without link errors.  */
+weak_function
+#endif
+re_comp (s)
+    const char *s;
+{
+  reg_errcode_t ret;
+
+  if (!s)
+    {
+      if (!re_comp_buf.buffer)
+	return gettext ("No previous regular expression");
+      return 0;
+    }
+
+  if (!re_comp_buf.buffer)
+    {
+      re_comp_buf.buffer = (unsigned char *) malloc (200);
+      if (re_comp_buf.buffer == NULL)
+        /* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
+      re_comp_buf.allocated = 200;
+
+      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
+      if (re_comp_buf.fastmap == NULL)
+	/* CVS: Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
+    }
+
+  /* Since `re_exec' always passes NULL for the `regs' argument, we
+     don't need to initialize the pattern buffer fields which affect it.  */
+
+  /* Match anchors at newlines.  */
+  re_comp_buf.newline_anchor = 1;
+
+  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+
+  if (!ret)
+    return NULL;
+
+  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+  return (char *) gettext (re_error_msgid[(int) ret]);
+}
+
+
+int
+#ifdef _LIBC
+weak_function
+#endif
+re_exec (s)
+    const char *s;
+{
+  const int len = strlen (s);
+  return
+    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+#endif /* _REGEX_RE_COMP */
+
+/* POSIX.2 functions.  Don't define these for Emacs.  */
+
+#ifndef emacs
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+   PREG is a regex_t *.  We do not expect any fields to be initialized,
+   since POSIX says we shouldn't.  Thus, we set
+
+     `buffer' to the compiled pattern;
+     `used' to the length of the compiled pattern;
+     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+       REG_EXTENDED bit in CFLAGS is set; otherwise, to
+       RE_SYNTAX_POSIX_BASIC;
+     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+     `fastmap' and `fastmap_accurate' to zero;
+     `re_nsub' to the number of subexpressions in PATTERN.
+
+   PATTERN is the address of the pattern string.
+
+   CFLAGS is a series of bits which affect compilation.
+
+     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+     use POSIX basic syntax.
+
+     If REG_NEWLINE is set, then . and [^...] don't match newline.
+     Also, regexec will try a match beginning after every newline.
+
+     If REG_ICASE is set, then we considers upper- and lowercase
+     versions of letters to be equivalent when matching.
+
+     If REG_NOSUB is set, then when PREG is passed to regexec, that
+     routine will report only success or failure, and nothing about the
+     registers.
+
+   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
+   the return codes and their meanings.)  */
+
+int
+regcomp (preg, pattern, cflags)
+    regex_t *preg;
+    const char *pattern;
+    int cflags;
+{
+  reg_errcode_t ret;
+  unsigned syntax
+    = (cflags & REG_EXTENDED) ?
+      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+  /* regex_compile will allocate the space for the compiled pattern.  */
+  preg->buffer = 0;
+  preg->allocated = 0;
+  preg->used = 0;
+
+  /* Don't bother to use a fastmap when searching.  This simplifies the
+     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
+     characters after newlines into the fastmap.  This way, we just try
+     every character.  */
+  preg->fastmap = 0;
+
+  if (cflags & REG_ICASE)
+    {
+      unsigned i;
+
+      preg->translate
+	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
+				      * sizeof (*(RE_TRANSLATE_TYPE)0));
+      if (preg->translate == NULL)
+        return (int) REG_ESPACE;
+
+      /* Map uppercase characters to corresponding lowercase ones.  */
+      for (i = 0; i < CHAR_SET_SIZE; i++)
+        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+    }
+  else
+    preg->translate = NULL;
+
+  /* If REG_NEWLINE is set, newlines are treated differently.  */
+  if (cflags & REG_NEWLINE)
+    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
+      syntax &= ~RE_DOT_NEWLINE;
+      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+      /* It also changes the matching behavior.  */
+      preg->newline_anchor = 1;
+    }
+  else
+    preg->newline_anchor = 0;
+
+  preg->no_sub = !!(cflags & REG_NOSUB);
+
+  /* POSIX says a null character in the pattern terminates it, so we
+     can use strlen here in compiling the pattern.  */
+  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+
+  /* POSIX doesn't distinguish between an unmatched open-group and an
+     unmatched close-group: both are REG_EPAREN.  */
+  if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+  return (int) ret;
+}
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+   string STRING.
+
+   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
+   least NMATCH elements, and we set them to the offsets of the
+   corresponding matched substrings.
+
+   EFLAGS specifies `execution flags' which affect matching: if
+   REG_NOTBOL is set, then ^ does not match at the beginning of the
+   string; if REG_NOTEOL is set, then $ does not match at the end.
+
+   We return 0 if we find a match and REG_NOMATCH if not.  */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+    const regex_t *preg;
+    const char *string;
+    size_t nmatch;
+    regmatch_t pmatch[];
+    int eflags;
+{
+  int ret;
+  struct re_registers regs;
+  regex_t private_preg;
+  int len = strlen (string);
+  boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+  private_preg = *preg;
+
+  private_preg.not_bol = !!(eflags & REG_NOTBOL);
+  private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+  /* The user has told us exactly how many registers to return
+     information about, via `nmatch'.  We have to pass that on to the
+     matching routines.  */
+  private_preg.regs_allocated = REGS_FIXED;
+
+  if (want_reg_info)
+    {
+      regs.num_regs = nmatch;
+      regs.start = TALLOC (nmatch, regoff_t);
+      regs.end = TALLOC (nmatch, regoff_t);
+      if (regs.start == NULL || regs.end == NULL)
+        return (int) REG_NOMATCH;
+    }
+
+  /* Perform the searching operation.  */
+  ret = re_search (&private_preg, string, len,
+                   /* start: */ 0, /* range: */ len,
+                   want_reg_info ? &regs : (struct re_registers *) 0);
+
+  /* Copy the register information to the POSIX structure.  */
+  if (want_reg_info)
+    {
+      if (ret >= 0)
+        {
+          unsigned r;
+
+          for (r = 0; r < nmatch; r++)
+            {
+              pmatch[r].rm_so = regs.start[r];
+              pmatch[r].rm_eo = regs.end[r];
+            }
+        }
+
+      /* If we needed the temporary register info, free the space now.  */
+      free (regs.start);
+      free (regs.end);
+    }
+
+  /* We want zero return to mean success, unlike `re_search'.  */
+  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+   from either regcomp or regexec.   We don't use PREG here.  */
+
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+    int errcode;
+    const regex_t *preg;
+    char *errbuf;
+    size_t errbuf_size;
+{
+  const char *msg;
+  size_t msg_size;
+
+  if (errcode < 0
+      || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
+    /* Only error codes returned by the rest of the code should be passed
+       to this routine.  If we are given anything else, or if other regex
+       code generates an invalid error code, then the program has a bug.
+       Dump core so we can fix it.  */
+    abort ();
+
+  msg = gettext (re_error_msgid[errcode]);
+
+  msg_size = strlen (msg) + 1; /* Includes the null.  */
+
+  if (errbuf_size != 0)
+    {
+      if (msg_size > errbuf_size)
+        {
+          strncpy (errbuf, msg, errbuf_size - 1);
+          errbuf[errbuf_size - 1] = 0;
+        }
+      else
+        strcpy (errbuf, msg);
+    }
+
+  return msg_size;
+}
+
+
+/* Free dynamically allocated space used by PREG.  */
+
+void
+regfree (preg)
+    regex_t *preg;
+{
+  if (preg->buffer != NULL)
+    free (preg->buffer);
+  preg->buffer = NULL;
+
+  preg->allocated = 0;
+  preg->used = 0;
+
+  if (preg->fastmap != NULL)
+    free (preg->fastmap);
+  preg->fastmap = NULL;
+  preg->fastmap_accurate = 0;
+
+  if (preg->translate != NULL)
+    free (preg->translate);
+  preg->translate = NULL;
+}
+
+#endif /* not emacs  */

+ 510 - 0
sys/src/ape/cmd/diff/regex.h

@@ -0,0 +1,510 @@
+/* Definitions for data structures and routines for the regular
+   expression library, version 0.12.
+
+   Copyright (C) 1985, 89, 90, 91, 92, 93, 95 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+   USA.  */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+   <regex.h>.  */
+
+#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS)
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+   should be there.  */
+#include <stddef.h>
+#endif
+
+
+/* The following bits are used to determine the regexp syntax we
+   recognize.  The set/not-set meanings are chosen so that Emacs syntax
+   remains the value 0.  The bits are given in alphabetical order, and
+   the definitions shifted by one from the previous bit; thus, when we
+   add or remove a bit, only one other definition need change.  */
+typedef unsigned reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+   If set, then such a \ quotes the following character.  */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+     literals. 
+   If set, then \+ and \? are operators and + and ? are literals.  */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported.  They are:
+     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
+     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+   If not set, then character classes are not supported.  */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+     expressions, of course).
+   If this bit is not set, then it depends:
+        ^  is an anchor if it is at the beginning of a regular
+           expression or after an open-group or an alternation operator;
+        $  is an anchor if it is at the end of a regular expression, or
+           before a close-group or an alternation operator.  
+
+   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+   POSIX draft 11.2 says that * etc. in leading positions is undefined.
+   We already implemented a previous draft which made those constructs
+   invalid, though, so we haven't changed the code back.  */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+     regardless of where they are in the pattern.
+   If this bit is not set, then special characters are special only in
+     some contexts; otherwise they are ordinary.  Specifically, 
+     * + ? and intervals are only special when not after the beginning,
+     open-group, or alternation operator.  */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+     immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+   If not set, then it doesn't.  */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+   If not set, then it does.  */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+   If not set, they do.  */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+     interval, depending on RE_NO_BK_BRACES. 
+   If not set, \{, \}, {, and } are literals.  */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+   If not set, they are.  */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+   If not set, newline is literal.  */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+     are literals.
+  If not set, then `\{...\}' defines an interval.  */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+   If not set, \(...\) defines a group, and ( and ) are literals.  */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+   If not set, then \<digit> is a back-reference.  */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal. 
+   If not set, then \| is an alternation operator, and | is literal.  */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+     than the starting range point, as in [z-a], is invalid.
+   If not set, then when ending range point collates higher than the
+     starting range point, the range is ignored.  */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+   If not set, then an unmatched ) is invalid.  */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+   without further backtracking.  */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+   some interfaces).  When a regexp is compiled, the syntax used is
+   stored in the pattern buffer, so changing this does not affect
+   already-compiled regexps.  */
+extern reg_syntax_t re_syntax_options;
+
+#ifdef emacs
+/* In Emacs, this is the string or buffer in which we
+   are matching.  It is used for looking up syntax properties.  */
+extern Lisp_Object re_match_object;
+#endif
+
+
+/* Define combinations of the above bits for the standard possibilities.
+   (The [[[ comments delimit what gets put into the Texinfo file, so
+   don't delete them!)  */ 
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK							\
+  (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL			\
+   | RE_NO_BK_PARENS            | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR               | RE_NO_EMPTY_RANGES			\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK 						\
+  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_GREP							\
+  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
+   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
+   | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP							\
+  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
+   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
+   | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP						\
+  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax.  */
+#define _RE_SYNTAX_POSIX_COMMON						\
+  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
+   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC						\
+  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
+   isn't minimal, since other operators, such as \`, aren't disabled.  */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS  | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS       | RE_NO_BK_VBAR				\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+   replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added.  */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow.  Some systems
+   (erroneously) define this in other header files, but we want our
+   value, so remove any previous define.  */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+#define RE_DUP_MAX ((1 << 15) - 1) 
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp').  */
+
+/* If this bit is set, then use extended regular expression syntax.
+   If not set, then use basic regular expression syntax.  */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define REG_ICASE (REG_EXTENDED << 1)
+ 
+/* If this bit is set, then anchors do not match at newline
+     characters in the string.
+   If not set, then anchors do match at newlines.  */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+   If not set, then returns differ between not matching and errors.  */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec).  */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+     the beginning of the string (presumably because it's not the
+     beginning of a line).
+   If not set, then the beginning-of-line operator does match the
+     beginning of the string.  */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line.  */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+   `re_error_msg' table in regex.c.  */
+typedef enum
+{
+  REG_NOERROR = 0,	/* Success.  */
+  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
+
+  /* POSIX regcomp return error codes.  (In the order listed in the
+     standard.)  */
+  REG_BADPAT,		/* Invalid pattern.  */
+  REG_ECOLLATE,		/* Not implemented.  */
+  REG_ECTYPE,		/* Invalid character class name.  */
+  REG_EESCAPE,		/* Trailing backslash.  */
+  REG_ESUBREG,		/* Invalid back reference.  */
+  REG_EBRACK,		/* Unmatched left bracket.  */
+  REG_EPAREN,		/* Parenthesis imbalance.  */ 
+  REG_EBRACE,		/* Unmatched \{.  */
+  REG_BADBR,		/* Invalid contents of \{\}.  */
+  REG_ERANGE,		/* Invalid range end.  */
+  REG_ESPACE,		/* Ran out of memory.  */
+  REG_BADRPT,		/* No preceding re for repetition op.  */
+
+  /* Error codes we've added.  */
+  REG_EEND,		/* Premature end.  */
+  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
+  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern.  Before calling
+   the pattern compiler, the fields `buffer', `allocated', `fastmap',
+   `translate', and `no_sub' can be set.  After the pattern has been
+   compiled, the `re_nsub' field is available.  All other fields are
+   private to the regex routines.  */
+
+#ifndef RE_TRANSLATE_TYPE 
+#define RE_TRANSLATE_TYPE char *
+#define RE_TRANSLATE(TBL, C) ((TBL)[C])
+#define RE_TRANSLATE_P(TBL) (TBL)
+#endif
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+	/* Space that holds the compiled pattern.  It is declared as
+          `unsigned char *' because its elements are
+           sometimes used as array indexes.  */
+  unsigned char *buffer;
+
+	/* Number of bytes to which `buffer' points.  */
+  unsigned long allocated;
+
+	/* Number of bytes actually used in `buffer'.  */
+  unsigned long used;	
+
+        /* Syntax setting with which the pattern was compiled.  */
+  reg_syntax_t syntax;
+
+        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
+           the fastmap, if there is one, to skip over impossible
+           starting points for matches.  */
+  char *fastmap;
+
+        /* Either a translate table to apply to all characters before
+           comparing them, or zero for no translation.  The translation
+           is applied to a pattern when it is compiled and to a string
+           when it is matched.  */
+  RE_TRANSLATE_TYPE translate;
+
+	/* Number of subexpressions found by the compiler.  */
+  size_t re_nsub;
+
+        /* Zero if this pattern cannot match the empty string, one else.
+           Well, in truth it's used only in `re_search_2', to see
+           whether or not we should use the fastmap, so we don't set
+           this absolutely perfectly; see `re_compile_fastmap' (the
+           `duplicate' case).  */
+  unsigned can_be_null : 1;
+
+        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+             for `max (RE_NREGS, re_nsub + 1)' groups.
+           If REGS_REALLOCATE, reallocate space if necessary.
+           If REGS_FIXED, use what's there.  */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+  unsigned regs_allocated : 2;
+
+        /* Set to zero when `regex_compile' compiles a pattern; set to one
+           by `re_compile_fastmap' if it updates the fastmap.  */
+  unsigned fastmap_accurate : 1;
+
+        /* If set, `re_match_2' does not return information about
+           subexpressions.  */
+  unsigned no_sub : 1;
+
+        /* If set, a beginning-of-line anchor doesn't match at the
+           beginning of the string.  */ 
+  unsigned not_bol : 1;
+
+        /* Similarly for an end-of-line anchor.  */
+  unsigned not_eol : 1;
+
+        /* If true, an anchor at a newline matches.  */
+  unsigned newline_anchor : 1;
+
+  /* If true, multi-byte form in the `buffer' should be recognized as a
+     multibyte character. */
+  unsigned multibyte : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string.  POSIX mandates this.  */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in.  See
+   regex.texinfo for a full description of what registers match.  */
+struct re_registers
+{
+  unsigned num_regs;
+  regoff_t *start;
+  regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+   `re_match_2' returns information about at least this many registers
+   the first time a `regs' structure is passed.  */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers.  Aside from the different names than
+   `re_registers', POSIX uses an array of structures, instead of a
+   structure of arrays.  */
+typedef struct
+{
+  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
+  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
+} regmatch_t;
+
+/* Declarations for routines.  */
+
+/* To avoid duplicating every routine declaration -- once with a
+   prototype (if we are ANSI), and once without (if we aren't) -- we
+   use the following macro to declare argument types.  This
+   unfortunately clutters up the declarations a bit, but I think it's
+   worth it.  */
+
+#if __STDC__
+
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+   You can also simply assign to the `re_syntax_options' variable.  */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+   and syntax given by the global `re_syntax_options', into the buffer
+   BUFFER.  Return NULL if successful, and an error string if not.  */
+extern const char *re_compile_pattern
+  _RE_ARGS ((const char *pattern, int length,
+             struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+   accelerate searches.  Return 0 if successful and -2 if was an
+   internal error.  */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+   compiled into BUFFER.  Start searching at position START, for RANGE
+   characters.  Return the starting position of the match, -1 for no
+   match, or -2 for an internal error.  Also return register
+   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
+extern int re_search
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+            int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+   STRING2.  Also, stop searching at index START + STOP.  */
+extern int re_search_2
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+             int length1, const char *string2, int length2,
+             int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+   in BUFFER matched, starting at position START.  */
+extern int re_match
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+             int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
+extern int re_match_2 
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+             int length1, const char *string2, int length2,
+             int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
+   for recording register information.  STARTS and ENDS must be
+   allocated with malloc, and must each be at least `NUM_REGS * sizeof
+   (regoff_t)' bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+extern void re_set_registers
+  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+             unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+#ifdef _REGEX_RE_COMP
+/* 4.2 bsd compatibility.  */
+/* CVS: don't use prototypes: they may conflict with system headers.  */
+extern char *re_comp _RE_ARGS (());
+extern int re_exec _RE_ARGS (());
+#endif
+
+/* POSIX compatibility.  */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+  _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+             regmatch_t pmatch[], int eflags));
+extern size_t regerror
+  _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+             size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/

+ 1109 - 0
sys/src/ape/cmd/diff/sdiff.c

@@ -0,0 +1,1109 @@
+/* SDIFF -- interactive merge front end to diff
+   Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+/* GNU SDIFF was written by Thomas Lord. */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/contrib/diff/sdiff.c,v 1.1.1.1.12.1 2002/01/28 01:26:35 nectar Exp $");
+
+#include "system.h"
+#include <stdio.h>
+#include <signal.h>
+#include "getopt.h"
+
+/* Size of chunks read from files which must be parsed into lines. */
+#define SDIFF_BUFSIZE ((size_t) 65536)
+
+/* Default name of the diff program */
+#ifndef DIFF_PROGRAM
+#define DIFF_PROGRAM "/usr/bin/diff"
+#endif
+
+/* Users' editor of nonchoice */
+#ifndef DEFAULT_EDITOR_PROGRAM
+#define DEFAULT_EDITOR_PROGRAM "ed"
+#endif
+
+extern char version_string[];
+static char const *program_name;
+static char const *diffbin = DIFF_PROGRAM;
+static char const *edbin = DEFAULT_EDITOR_PROGRAM;
+static char const **diffargv;
+
+static char *tmpname;
+static int volatile tmpmade;
+
+#if HAVE_FORK
+static pid_t volatile diffpid;
+#endif
+
+struct line_filter;
+
+static FILE *ck_fopen PARAMS((char const *, char const *));
+static RETSIGTYPE catchsig PARAMS((int));
+static VOID *xmalloc PARAMS((size_t));
+static char const *expand_name PARAMS((char *, int, char const *));
+static int edit PARAMS((struct line_filter *, int, struct line_filter *, int, FILE*));
+static int interact PARAMS((struct line_filter *, struct line_filter *, struct line_filter *, FILE*));
+static int lf_snarf PARAMS((struct line_filter *, char *, size_t));
+static int skip_white PARAMS((void));
+static size_t ck_fread PARAMS((char *, size_t, FILE *));
+static size_t lf_refill PARAMS((struct line_filter *));
+static void checksigs PARAMS((void));
+static void ck_fclose PARAMS((FILE *));
+static void ck_fflush PARAMS((FILE *));
+static void ck_fwrite PARAMS((char const *, size_t, FILE *));
+static void cleanup PARAMS((void));
+static void diffarg PARAMS((char const *));
+static void execdiff PARAMS((void));
+static void exiterr PARAMS((void));
+static void fatal PARAMS((char const *));
+static void flush_line PARAMS((void));
+static void give_help PARAMS((void));
+static void lf_copy PARAMS((struct line_filter *, int, FILE *));
+static void lf_init PARAMS((struct line_filter *, FILE *));
+static void lf_skip PARAMS((struct line_filter *, int));
+static void perror_fatal PARAMS((char const *));
+static void trapsigs PARAMS((void));
+static void try_help PARAMS((char const *));
+static void untrapsig PARAMS((int));
+static void usage PARAMS((void));
+
+static int diraccess PARAMS((char const *));
+
+/* Options: */
+
+/* name of output file if -o spec'd */
+static char *out_file;
+
+/* do not print common lines if true, set by -s option */
+static int suppress_common_flag;
+
+static struct option const longopts[] =
+{
+  {"ignore-blank-lines", 0, 0, 'B'},
+  {"speed-large-files", 0, 0, 'H'},
+  {"ignore-matching-lines", 1, 0, 'I'},
+  {"ignore-all-space", 0, 0, 'W'}, /* swap W and w for historical reasons */
+  {"text", 0, 0, 'a'},
+  {"ignore-space-change", 0, 0, 'b'},
+  {"minimal", 0, 0, 'd'},
+  {"ignore-case", 0, 0, 'i'},
+  {"left-column", 0, 0, 'l'},
+  {"output", 1, 0, 'o'},
+  {"suppress-common-lines", 0, 0, 's'},
+  {"expand-tabs", 0, 0, 't'},
+  {"width", 1, 0, 'w'},
+  {"version", 0, 0, 'v'},
+  {"help", 0, 0, 129},
+  {0, 0, 0, 0}
+};
+
+static void
+try_help (reason)
+     char const *reason;
+{
+  if (reason)
+    fprintf (stderr, "%s: %s\n", program_name, reason);
+  fprintf (stderr, "%s: Try `%s --help' for more information.\n",
+	   program_name, program_name);
+  exit (2);
+}
+
+static void
+usage ()
+{
+  printf ("Usage: %s [OPTIONS]... FILE1 FILE2\n\n", program_name);
+  printf ("%s", "\
+  -o FILE  --output=FILE  Operate interactively, sending output to FILE.\n\n");
+  printf ("%s", "\
+  -i  --ignore-case  Consider upper- and lower-case to be the same.\n\
+  -W  --ignore-all-space  Ignore all white space.\n\
+  -b  --ignore-space-change  Ignore changes in the amount of white space.\n\
+  -B  --ignore-blank-lines  Ignore changes whose lines are all blank.\n\
+  -I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE.\n\
+  -a  --text  Treat all files as text.\n\n");
+  printf ("%s", "\
+  -w NUM  --width=NUM  Output at most NUM (default 130) characters per line.\n\
+  -l  --left-column  Output only the left column of common lines.\n\
+  -s  --suppress-common-lines  Do not output common lines.\n\n");
+  printf ("\
+  -t  --expand-tabs  Expand tabs to spaces in output.\n\n");
+  printf ("%s", "\
+  -d  --minimal  Try hard to find a smaller set of changes.\n\
+  -H  --speed-large-files  Assume large files and many scattered small changes.\n\n");
+ printf ("%s", "\
+  -v  --version  Output version info.\n\
+  --help  Output this help.\n\n\
+If FILE1 or FILE2 is `-', read standard input.\n");
+}
+
+static void
+cleanup ()
+{
+#if HAVE_FORK
+  if (0 < diffpid)
+    kill (diffpid, SIGPIPE);
+#endif
+  if (tmpmade)
+    unlink (tmpname);
+}
+
+static void
+exiterr ()
+{
+  cleanup ();
+  untrapsig (0);
+  checksigs ();
+  exit (2);
+}
+
+static void
+fatal (msg)
+     char const *msg;
+{
+  fprintf (stderr, "%s: %s\n", program_name, msg);
+  exiterr ();
+}
+
+static void
+perror_fatal (msg)
+     char const *msg;
+{
+  int e = errno;
+  checksigs ();
+  fprintf (stderr, "%s: ", program_name);
+  errno = e;
+  perror (msg);
+  exiterr ();
+}
+
+
+/* malloc freely or DIE! */
+static VOID *
+xmalloc (size)
+     size_t size;
+{
+  VOID *r = (VOID *) malloc (size);
+  if (!r)
+    fatal ("memory exhausted");
+  return r;
+}
+
+static FILE *
+ck_fopen (fname, type)
+     char const *fname, *type;
+{
+  FILE *r = fopen (fname, type);
+  if (!r)
+    perror_fatal (fname);
+  return r;
+}
+
+static void
+ck_fclose (f)
+     FILE *f;
+{
+  if (fclose (f))
+    perror_fatal ("input/output error");
+}
+
+static size_t
+ck_fread (buf, size, f)
+     char *buf;
+     size_t size;
+     FILE *f;
+{
+  size_t r = fread (buf, sizeof (char), size, f);
+  if (r == 0 && ferror (f))
+    perror_fatal ("input error");
+  return r;
+}
+
+static void
+ck_fwrite (buf, size, f)
+     char const *buf;
+     size_t size;
+     FILE *f;
+{
+  if (fwrite (buf, sizeof (char), size, f) != size)
+    perror_fatal ("output error");
+}
+
+static void
+ck_fflush (f)
+     FILE *f;
+{
+  if (fflush (f) != 0)
+    perror_fatal ("output error");
+}
+
+static char const *
+expand_name (name, is_dir, other_name)
+     char *name;
+     int is_dir;
+     char const *other_name;
+{
+  if (strcmp (name, "-") == 0)
+    fatal ("cannot interactively merge standard input");
+  if (!is_dir)
+    return name;
+  else
+    {
+      /* Yield NAME/BASE, where BASE is OTHER_NAME's basename.  */
+      char const *p = filename_lastdirchar (other_name);
+      char const *base = p ? p+1 : other_name;
+      size_t namelen = strlen (name), baselen = strlen (base);
+      char *r = xmalloc (namelen + baselen + 2);
+      memcpy (r, name, namelen);
+      r[namelen] = '/';
+      memcpy (r + namelen + 1, base, baselen + 1);
+      return r;
+    }
+}
+
+
+
+struct line_filter {
+  FILE *infile;
+  char *bufpos;
+  char *buffer;
+  char *buflim;
+};
+
+static void
+lf_init (lf, infile)
+     struct line_filter *lf;
+     FILE *infile;
+{
+  lf->infile = infile;
+  lf->bufpos = lf->buffer = lf->buflim = xmalloc (SDIFF_BUFSIZE + 1);
+  lf->buflim[0] = '\n';
+}
+
+/* Fill an exhausted line_filter buffer from its INFILE */
+static size_t
+lf_refill (lf)
+     struct line_filter *lf;
+{
+  size_t s = ck_fread (lf->buffer, SDIFF_BUFSIZE, lf->infile);
+  lf->bufpos = lf->buffer;
+  lf->buflim = lf->buffer + s;
+  lf->buflim[0] = '\n';
+  checksigs ();
+  return s;
+}
+
+/* Advance LINES on LF's infile, copying lines to OUTFILE */
+static void
+lf_copy (lf, lines, outfile)
+     struct line_filter *lf;
+     int lines;
+     FILE *outfile;
+{
+  char *start = lf->bufpos;
+
+  while (lines)
+    {
+      lf->bufpos = (char *) memchr (lf->bufpos, '\n', lf->buflim - lf->bufpos);
+      if (! lf->bufpos)
+	{
+	  ck_fwrite (start, lf->buflim - start, outfile);
+	  if (! lf_refill (lf))
+	    return;
+	  start = lf->bufpos;
+	}
+      else
+	{
+	  --lines;
+	  ++lf->bufpos;
+	}
+    }
+
+  ck_fwrite (start, lf->bufpos - start, outfile);
+}
+
+/* Advance LINES on LF's infile without doing output */
+static void
+lf_skip (lf, lines)
+     struct line_filter *lf;
+     int lines;
+{
+  while (lines)
+    {
+      lf->bufpos = (char *) memchr (lf->bufpos, '\n', lf->buflim - lf->bufpos);
+      if (! lf->bufpos)
+	{
+	  if (! lf_refill (lf))
+	    break;
+	}
+      else
+	{
+	  --lines;
+	  ++lf->bufpos;
+	}
+    }
+}
+
+/* Snarf a line into a buffer.  Return EOF if EOF, 0 if error, 1 if OK.  */
+static int
+lf_snarf (lf, buffer, bufsize)
+     struct line_filter *lf;
+     char *buffer;
+     size_t bufsize;
+{
+  char *start = lf->bufpos;
+
+  for (;;)
+    {
+      char *next = (char *) memchr (start, '\n', lf->buflim + 1 - start);
+      size_t s = next - start;
+      if (bufsize <= s)
+	return 0;
+      memcpy (buffer, start, s);
+      if (next < lf->buflim)
+	{
+	  buffer[s] = 0;
+	  lf->bufpos = next + 1;
+	  return 1;
+	}
+      if (! lf_refill (lf))
+	return s ? 0 : EOF;
+      buffer += s;
+      bufsize -= s;
+      start = next;
+    }
+}
+
+
+
+int
+main (argc, argv)
+     int argc;
+     char *argv[];
+{
+  int opt;
+  char *editor;
+  char *differ;
+
+  initialize_main (&argc, &argv);
+  program_name = argv[0];
+
+  editor = getenv ("EDITOR");
+  if (editor)
+    edbin = editor;
+  differ = getenv ("DIFF");
+  if (differ)
+    diffbin = differ;
+
+  diffarg ("diff");
+
+  /* parse command line args */
+  while ((opt = getopt_long (argc, argv, "abBdHiI:lo:stvw:W", longopts, 0))
+	 != EOF)
+    {
+      switch (opt)
+	{
+	case 'a':
+	  diffarg ("-a");
+	  break;
+
+	case 'b':
+	  diffarg ("-b");
+	  break;
+
+	case 'B':
+	  diffarg ("-B");
+	  break;
+
+	case 'd':
+	  diffarg ("-d");
+	  break;
+
+	case 'H':
+	  diffarg ("-H");
+	  break;
+
+	case 'i':
+	  diffarg ("-i");
+	  break;
+
+	case 'I':
+	  diffarg ("-I");
+	  diffarg (optarg);
+	  break;
+
+	case 'l':
+	  diffarg ("--left-column");
+	  break;
+
+	case 'o':
+	  out_file = optarg;
+	  break;
+
+	case 's':
+	  suppress_common_flag = 1;
+	  break;
+
+	case 't':
+	  diffarg ("-t");
+	  break;
+
+	case 'v':
+	  printf ("sdiff - GNU diffutils version %s\n", version_string);
+	  exit (0);
+
+	case 'w':
+	  diffarg ("-W");
+	  diffarg (optarg);
+	  break;
+
+	case 'W':
+	  diffarg ("-w");
+	  break;
+
+	case 129:
+	  usage ();
+	  if (ferror (stdout) || fclose (stdout) != 0)
+	    fatal ("write error");
+	  exit (0);
+
+	default:
+	  try_help (0);
+	}
+    }
+
+  if (argc - optind != 2)
+    try_help (argc - optind < 2 ? "missing operand" : "extra operand");
+
+  if (! out_file)
+    {
+      /* easy case: diff does everything for us */
+      if (suppress_common_flag)
+	diffarg ("--suppress-common-lines");
+      diffarg ("-y");
+      diffarg ("--");
+      diffarg (argv[optind]);
+      diffarg (argv[optind + 1]);
+      diffarg (0);
+      execdiff ();
+    }
+  else
+    {
+      FILE *left, *right, *out, *diffout;
+      int interact_ok;
+      struct line_filter lfilt;
+      struct line_filter rfilt;
+      struct line_filter diff_filt;
+      int leftdir = diraccess (argv[optind]);
+      int rightdir = diraccess (argv[optind + 1]);
+
+      if (leftdir && rightdir)
+	fatal ("both files to be compared are directories");
+
+      left = ck_fopen (expand_name (argv[optind], leftdir, argv[optind + 1]), "r");
+      ;
+      right = ck_fopen (expand_name (argv[optind + 1], rightdir, argv[optind]), "r");
+      out = ck_fopen (out_file, "w");
+
+      diffarg ("--sdiff-merge-assist");
+      diffarg ("--");
+      diffarg (argv[optind]);
+      diffarg (argv[optind + 1]);
+      diffarg (0);
+
+      trapsigs ();
+
+#if ! HAVE_FORK
+      {
+	size_t cmdsize = 1;
+	char *p, *command;
+	int i;
+
+	for (i = 0;  diffargv[i];  i++)
+	  cmdsize += 4 * strlen (diffargv[i]) + 3;
+	command = p = xmalloc (cmdsize);
+	for (i = 0;  diffargv[i];  i++)
+	  {
+	    char const *a = diffargv[i];
+	    SYSTEM_QUOTE_ARG (p, a);
+	    *p++ = ' ';
+	  }
+	p[-1] = '\0';
+	diffout = popen (command, "r");
+	if (!diffout)
+	  perror_fatal (command);
+	free (command);
+      }
+#else /* HAVE_FORK */
+      {
+	int diff_fds[2];
+
+	if (pipe (diff_fds) != 0)
+	  perror_fatal ("pipe");
+
+	diffpid = fork ();
+	if (diffpid < 0)
+	  perror_fatal ("fork failed");
+	if (!diffpid)
+	  {
+	    signal (SIGINT, SIG_IGN);  /* in case user interrupts editor */
+	    signal (SIGPIPE, SIG_DFL);
+
+	    close (diff_fds[0]);
+	    if (diff_fds[1] != STDOUT_FILENO)
+	      {
+		dup2 (diff_fds[1], STDOUT_FILENO);
+		close (diff_fds[1]);
+	      }
+
+	    execdiff ();
+	  }
+
+	close (diff_fds[1]);
+	diffout = fdopen (diff_fds[0], "r");
+	if (!diffout)
+	  perror_fatal ("fdopen");
+      }
+#endif /* HAVE_FORK */
+
+      lf_init (&diff_filt, diffout);
+      lf_init (&lfilt, left);
+      lf_init (&rfilt, right);
+
+      interact_ok = interact (&diff_filt, &lfilt, &rfilt, out);
+
+      ck_fclose (left);
+      ck_fclose (right);
+      ck_fclose (out);
+
+      {
+	int wstatus;
+
+#if ! HAVE_FORK
+	wstatus = pclose (diffout);
+#else
+	ck_fclose (diffout);
+	while (waitpid (diffpid, &wstatus, 0) < 0)
+	  if (errno == EINTR)
+	    checksigs ();
+	  else
+	    perror_fatal ("wait failed");
+	diffpid = 0;
+#endif
+
+	if (tmpmade)
+	  {
+	    unlink (tmpname);
+	    tmpmade = 0;
+	  }
+
+	if (! interact_ok)
+	  exiterr ();
+
+	if (! (WIFEXITED (wstatus) && WEXITSTATUS (wstatus) < 2))
+	  fatal ("Subsidiary diff failed");
+
+	untrapsig (0);
+	checksigs ();
+	exit (WEXITSTATUS (wstatus));
+      }
+    }
+  return 0;			/* Fool -Wall . . . */
+}
+
+static void
+diffarg (a)
+     char const *a;
+{
+  static unsigned diffargs, diffargsmax;
+
+  if (diffargs == diffargsmax)
+    {
+      if (! diffargsmax)
+	{
+	  diffargv = (char const **) xmalloc (sizeof (char));
+	  diffargsmax = 8;
+	}
+      diffargsmax *= 2;
+      diffargv = (char const **) realloc (diffargv,
+					  diffargsmax * sizeof (char const *));
+      if (! diffargv)
+	fatal ("out of memory");
+    }
+  diffargv[diffargs++] = a;
+}
+
+static void
+execdiff ()
+{
+  execvp (diffbin, (char **) diffargv);
+  write (STDERR_FILENO, diffbin, strlen (diffbin));
+  write (STDERR_FILENO, ": not found\n", 12);
+  _exit (2);
+}
+
+
+
+
+/* Signal handling */
+
+#define NUM_SIGS (sizeof (sigs) / sizeof (*sigs))
+static int const sigs[] = {
+#ifdef SIGHUP
+       SIGHUP,
+#endif
+#ifdef SIGQUIT
+       SIGQUIT,
+#endif
+#ifdef SIGTERM
+       SIGTERM,
+#endif
+#ifdef SIGXCPU
+       SIGXCPU,
+#endif
+#ifdef SIGXFSZ
+       SIGXFSZ,
+#endif
+       SIGINT,
+       SIGPIPE
+};
+
+/* Prefer `sigaction' if it is available, since `signal' can lose signals.  */
+#if HAVE_SIGACTION
+static struct sigaction initial_action[NUM_SIGS];
+#define initial_handler(i) (initial_action[i].sa_handler)
+#else
+static RETSIGTYPE (*initial_action[NUM_SIGS]) ();
+#define initial_handler(i) (initial_action[i])
+#endif
+
+static int volatile ignore_SIGINT;
+static int volatile signal_received;
+static int sigs_trapped;
+
+static RETSIGTYPE
+catchsig (s)
+     int s;
+{
+#if ! HAVE_SIGACTION
+  signal (s, SIG_IGN);
+#endif
+  if (! (s == SIGINT && ignore_SIGINT))
+    signal_received = s;
+}
+
+static void
+trapsigs ()
+{
+  int i;
+
+#if HAVE_SIGACTION
+  struct sigaction catchaction;
+  bzero (&catchaction, sizeof (catchaction));
+  catchaction.sa_handler = catchsig;
+#ifdef SA_INTERRUPT
+  /* Non-Posix BSD-style systems like SunOS 4.1.x need this
+     so that `read' calls are interrupted properly.  */
+  catchaction.sa_flags = SA_INTERRUPT;
+#endif
+  sigemptyset (&catchaction.sa_mask);
+  for (i = 0;  i < NUM_SIGS;  i++)
+    sigaddset (&catchaction.sa_mask, sigs[i]);
+  for (i = 0;  i < NUM_SIGS;  i++)
+    {
+      sigaction (sigs[i], 0, &initial_action[i]);
+      if (initial_handler (i) != SIG_IGN
+	  && sigaction (sigs[i], &catchaction, 0) != 0)
+	fatal ("signal error");
+    }
+#else /* ! HAVE_SIGACTION */
+  for (i = 0;  i < NUM_SIGS;  i++)
+    {
+      initial_action[i] = signal (sigs[i], SIG_IGN);
+      if (initial_handler (i) != SIG_IGN
+	  && signal (sigs[i], catchsig) != SIG_IGN)
+	fatal ("signal error");
+    }
+#endif /* ! HAVE_SIGACTION */
+
+#if !defined(SIGCHLD) && defined(SIGCLD)
+#define SIGCHLD SIGCLD
+#endif
+#ifdef SIGCHLD
+  /* System V fork+wait does not work if SIGCHLD is ignored.  */
+  signal (SIGCHLD, SIG_DFL);
+#endif
+
+  sigs_trapped = 1;
+}
+
+/* Untrap signal S, or all trapped signals if S is zero.  */
+static void
+untrapsig (s)
+     int s;
+{
+  int i;
+
+  if (sigs_trapped)
+    for (i = 0;  i < NUM_SIGS;  i++)
+      if ((!s || sigs[i] == s)  &&  initial_handler (i) != SIG_IGN)
+#if HAVE_SIGACTION
+	  sigaction (sigs[i], &initial_action[i], 0);
+#else
+	  signal (sigs[i], initial_action[i]);
+#endif
+}
+
+/* Exit if a signal has been received.  */
+static void
+checksigs ()
+{
+  int s = signal_received;
+  if (s)
+    {
+      cleanup ();
+
+      /* Yield an exit status indicating that a signal was received.  */
+      untrapsig (s);
+      kill (getpid (), s);
+
+      /* That didn't work, so exit with error status.  */
+      exit (2);
+    }
+}
+
+
+
+static void
+give_help ()
+{
+  fprintf (stderr,"l:\tuse the left version\n");
+  fprintf (stderr,"r:\tuse the right version\n");
+  fprintf (stderr,"e l:\tedit then use the left version\n");
+  fprintf (stderr,"e r:\tedit then use the right version\n");
+  fprintf (stderr,"e b:\tedit then use the left and right versions concatenated\n");
+  fprintf (stderr,"e:\tedit a new version\n");
+  fprintf (stderr,"s:\tsilently include common lines\n");
+  fprintf (stderr,"v:\tverbosely include common lines\n");
+  fprintf (stderr,"q:\tquit\n");
+}
+
+static int
+skip_white ()
+{
+  int c;
+  for (;;)
+    {
+      c = getchar ();
+      if (!ISSPACE (c) || c == '\n')
+	break;
+      checksigs ();
+    }
+  if (ferror (stdin))
+    perror_fatal ("input error");
+  return c;
+}
+
+static void
+flush_line ()
+{
+  int c;
+  while ((c = getchar ()) != '\n' && c != EOF)
+    ;
+  if (ferror (stdin))
+    perror_fatal ("input error");
+}
+
+
+/* interpret an edit command */
+static int
+edit (left, lenl, right, lenr, outfile)
+     struct line_filter *left;
+     int lenl;
+     struct line_filter *right;
+     int lenr;
+     FILE *outfile;
+{
+  for (;;)
+    {
+      int cmd0, cmd1;
+      int gotcmd = 0;
+
+      cmd1 = 0; /* Pacify `gcc -W'.  */
+
+      while (!gotcmd)
+	{
+	  if (putchar ('%') != '%')
+	    perror_fatal ("output error");
+	  ck_fflush (stdout);
+
+	  cmd0 = skip_white ();
+	  switch (cmd0)
+	    {
+	    case 'l': case 'r': case 's': case 'v': case 'q':
+	      if (skip_white () != '\n')
+		{
+		  give_help ();
+		  flush_line ();
+		  continue;
+		}
+	      gotcmd = 1;
+	      break;
+
+	    case 'e':
+	      cmd1 = skip_white ();
+	      switch (cmd1)
+		{
+		case 'l': case 'r': case 'b':
+		  if (skip_white () != '\n')
+		    {
+		      give_help ();
+		      flush_line ();
+		      continue;
+		    }
+		  gotcmd = 1;
+		  break;
+		case '\n':
+		  gotcmd = 1;
+		  break;
+		default:
+		  give_help ();
+		  flush_line ();
+		  continue;
+		}
+	      break;
+	    case EOF:
+	      if (feof (stdin))
+		{
+		  gotcmd = 1;
+		  cmd0 = 'q';
+		  break;
+		}
+	      /* falls through */
+	    default:
+	      flush_line ();
+	      /* falls through */
+	    case '\n':
+	      give_help ();
+	      continue;
+	    }
+	}
+
+      switch (cmd0)
+	{
+	case 'l':
+	  lf_copy (left, lenl, outfile);
+	  lf_skip (right, lenr);
+	  return 1;
+	case 'r':
+	  lf_copy (right, lenr, outfile);
+	  lf_skip (left, lenl);
+	  return 1;
+	case 's':
+	  suppress_common_flag = 1;
+	  break;
+	case 'v':
+	  suppress_common_flag = 0;
+	  break;
+	case 'q':
+	  return 0;
+	case 'e':
+	  {
+	    int tfd;
+	    FILE *tmp;
+
+	    if (tmpmade)
+	      {
+	        unlink (tmpname);
+	        tmpmade = 0;
+		free (tmpname);
+	      }
+
+	    asprintf (&tmpname, "%s/sdiff.XXXXXX",
+	      getenv("TMPDIR") ?: P_tmpdir);
+	    if (tmpname == NULL)
+	      perror_fatal ("temporary file name");
+	    tfd = mkstemp(tmpname);
+	    if (tfd == -1)
+	      perror_fatal ("temporary file name");
+	    tmp = fdopen (tfd, "w+");
+	    if (tmp == NULL)
+	      perror_fatal ("temporary file name");
+ 
+	    tmpmade = 1;
+
+	    if (cmd1 == 'l' || cmd1 == 'b')
+	      lf_copy (left, lenl, tmp);
+	    else
+	      lf_skip (left, lenl);
+
+	    if (cmd1 == 'r' || cmd1 == 'b')
+	      lf_copy (right, lenr, tmp);
+	    else
+	      lf_skip (right, lenr);
+
+	    ck_fflush (tmp);
+
+	    {
+	      int wstatus;
+#if ! HAVE_FORK
+	      char *command = xmalloc (strlen (edbin) + strlen (tmpname) + 2);
+	      sprintf (command, "%s %s", edbin, tmpname);
+	      wstatus = system (command);
+	      free (command);
+#else /* HAVE_FORK */
+	      pid_t pid;
+
+	      ignore_SIGINT = 1;
+	      checksigs ();
+
+	      pid = fork ();
+	      if (pid == 0)
+		{
+		  char const *argv[3];
+		  int i = 0;
+
+		  argv[i++] = edbin;
+		  argv[i++] = tmpname;
+		  argv[i++] = 0;
+
+		  execvp (edbin, (char **) argv);
+		  write (STDERR_FILENO, edbin, strlen (edbin));
+		  write (STDERR_FILENO, ": not found\n", 12);
+		  _exit (1);
+		}
+
+	      if (pid < 0)
+		perror_fatal ("fork failed");
+
+	      while (waitpid (pid, &wstatus, 0) < 0)
+		if (errno == EINTR)
+		  checksigs ();
+		else
+		  perror_fatal ("wait failed");
+
+	      ignore_SIGINT = 0;
+#endif /* HAVE_FORK */
+
+	      if (wstatus != 0)
+		fatal ("Subsidiary editor failed");
+	    }
+
+	    if (fseek (tmp, 0L, SEEK_SET) != 0)
+	      perror_fatal ("fseek");
+	    {
+	      /* SDIFF_BUFSIZE is too big for a local var
+		 in some compilers, so we allocate it dynamically.  */
+	      char *buf = xmalloc (SDIFF_BUFSIZE);
+	      size_t size;
+
+	      while ((size = ck_fread (buf, SDIFF_BUFSIZE, tmp)) != 0)
+		{
+		  checksigs ();
+		  ck_fwrite (buf, size, outfile);
+		}
+	      ck_fclose (tmp);
+
+	      free (buf);
+	    }
+	    return 1;
+	  }
+	default:
+	  give_help ();
+	  break;
+	}
+    }
+}
+
+
+
+/* Alternately reveal bursts of diff output and handle user commands.  */
+static int
+interact (diff, left, right, outfile)
+     struct line_filter *diff;
+     struct line_filter *left;
+     struct line_filter *right;
+     FILE *outfile;
+{
+  for (;;)
+    {
+      char diff_help[256];
+      int snarfed = lf_snarf (diff, diff_help, sizeof (diff_help));
+
+      if (snarfed <= 0)
+	return snarfed;
+
+      checksigs ();
+
+      switch (diff_help[0])
+	{
+	case ' ':
+	  puts (diff_help + 1);
+	  break;
+	case 'i':
+	  {
+	    int lenl = atoi (diff_help + 1), lenr, lenmax;
+	    char *p = strchr (diff_help, ',');
+
+	    if (!p)
+	      fatal (diff_help);
+	    lenr = atoi (p + 1);
+	    lenmax = max (lenl, lenr);
+
+	    if (suppress_common_flag)
+	      lf_skip (diff, lenmax);
+	    else
+	      lf_copy (diff, lenmax, stdout);
+
+	    lf_copy (left, lenl, outfile);
+	    lf_skip (right, lenr);
+	    break;
+	  }
+	case 'c':
+	  {
+	    int lenl = atoi (diff_help + 1), lenr;
+	    char *p = strchr (diff_help, ',');
+
+	    if (!p)
+	      fatal (diff_help);
+	    lenr = atoi (p + 1);
+	    lf_copy (diff, max (lenl, lenr), stdout);
+	    if (! edit (left, lenl, right, lenr, outfile))
+	      return 0;
+	    break;
+	  }
+	default:
+	  fatal (diff_help);
+	  break;
+	}
+    }
+}
+
+
+
+/* temporary lossage: this is torn from gnu libc */
+/* Return nonzero if DIR is an existing directory.  */
+static int
+diraccess (dir)
+     char const *dir;
+{
+  struct stat buf;
+  return stat (dir, &buf) == 0 && S_ISDIR (buf.st_mode);
+}

+ 284 - 0
sys/src/ape/cmd/diff/side.c

@@ -0,0 +1,284 @@
+/* sdiff-format output routines for GNU DIFF.
+   Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY.  No author or distributor
+accepts responsibility to anyone for the consequences of using it
+or for whether it serves any particular purpose or works at all,
+unless he says so in writing.  Refer to the GNU DIFF General Public
+License for full details.
+
+Everyone is granted permission to copy, modify and redistribute
+GNU DIFF, but only under the conditions described in the
+GNU DIFF General Public License.   A copy of this license is
+supposed to have been given to you along with GNU DIFF so you
+can know your rights and responsibilities.  It should be in a
+file named COPYING.  Among other things, the copyright notice
+and this notice must be preserved on all copies.  */
+
+
+#include "diff.h"
+
+static unsigned print_half_line PARAMS((char const * const *, unsigned, unsigned));
+static unsigned tab_from_to PARAMS((unsigned, unsigned));
+static void print_1sdiff_line PARAMS((char const * const *, int, char const * const *));
+static void print_sdiff_common_lines PARAMS((int, int));
+static void print_sdiff_hunk PARAMS((struct change *));
+
+/* Next line number to be printed in the two input files.  */
+static int next0, next1;
+
+/* Print the edit-script SCRIPT as a sdiff style output.  */
+
+void
+print_sdiff_script (script)
+     struct change *script;
+{
+  begin_output ();
+
+  next0 = next1 = - files[0].prefix_lines;
+  print_script (script, find_change, print_sdiff_hunk);
+
+  print_sdiff_common_lines (files[0].valid_lines, files[1].valid_lines);
+}
+
+/* Tab from column FROM to column TO, where FROM <= TO.  Yield TO.  */
+
+static unsigned
+tab_from_to (from, to)
+     unsigned from, to;
+{
+  FILE *out = outfile;
+  unsigned tab;
+
+  if (! tab_expand_flag)
+    for (tab = from + TAB_WIDTH - from % TAB_WIDTH;  tab <= to;  tab += TAB_WIDTH)
+      {
+	putc ('\t', out);
+	from = tab;
+      }
+  while (from++ < to)
+    putc (' ', out);
+  return to;
+}
+
+/*
+ * Print the text for half an sdiff line.  This means truncate to width
+ * observing tabs, and trim a trailing newline.  Returns the last column
+ * written (not the number of chars).
+ */
+static unsigned
+print_half_line (line, indent, out_bound)
+     char const * const *line;
+     unsigned indent, out_bound;
+{
+  FILE *out = outfile;
+  register unsigned in_position = 0, out_position = 0;
+  register char const
+	*text_pointer = line[0],
+	*text_limit = line[1];
+
+  while (text_pointer < text_limit)
+    {
+      register unsigned char c = *text_pointer++;
+
+      switch (c)
+	{
+	case '\t':
+	  {
+	    unsigned spaces = TAB_WIDTH - in_position % TAB_WIDTH;
+	    if (in_position == out_position)
+	      {
+		unsigned tabstop = out_position + spaces;
+		if (tab_expand_flag)
+		  {
+		    if (out_bound < tabstop)
+		      tabstop = out_bound;
+		    for (;  out_position < tabstop;  out_position++)
+		      putc (' ', out);
+		  }
+		else
+		  if (tabstop < out_bound)
+		    {
+		      out_position = tabstop;
+		      putc (c, out);
+		    }
+	      }
+	    in_position += spaces;
+	  }
+	  break;
+
+	case '\r':
+	  {
+	    putc (c, out);
+	    tab_from_to (0, indent);
+	    in_position = out_position = 0;
+	  }
+	  break;
+
+	case '\b':
+	  if (in_position != 0 && --in_position < out_bound)
+	    if (out_position <= in_position)
+	      /* Add spaces to make up for suppressed tab past out_bound.  */
+	      for (;  out_position < in_position;  out_position++)
+		putc (' ', out);
+	    else
+	      {
+		out_position = in_position;
+		putc (c, out);
+	      }
+	  break;
+
+	case '\f':
+	case '\v':
+	control_char:
+	  if (in_position < out_bound)
+	    putc (c, out);
+	  break;
+
+	default:
+	  if (! ISPRINT (c))
+	    goto control_char;
+	  /* falls through */
+	case ' ':
+	  if (in_position++ < out_bound)
+	    {
+	      out_position = in_position;
+	      putc (c, out);
+	    }
+	  break;
+
+	case '\n':
+	  return out_position;
+	}
+    }
+
+  return out_position;
+}
+
+/*
+ * Print side by side lines with a separator in the middle.
+ * 0 parameters are taken to indicate white space text.
+ * Blank lines that can easily be caught are reduced to a single newline.
+ */
+
+static void
+print_1sdiff_line (left, sep, right)
+     char const * const *left;
+     int sep;
+     char const * const *right;
+{
+  FILE *out = outfile;
+  unsigned hw = sdiff_half_width, c2o = sdiff_column2_offset;
+  unsigned col = 0;
+  int put_newline = 0;
+
+  if (left)
+    {
+      if (left[1][-1] == '\n')
+	put_newline = 1;
+      col = print_half_line (left, 0, hw);
+    }
+
+  if (sep != ' ')
+    {
+      col = tab_from_to (col, (hw + c2o - 1) / 2) + 1;
+      if (sep == '|' && put_newline != (right[1][-1] == '\n'))
+	sep = put_newline ? '/' : '\\';
+      putc (sep, out);
+    }
+
+  if (right)
+    {
+      if (right[1][-1] == '\n')
+	put_newline = 1;
+      if (**right != '\n')
+	{
+	  col = tab_from_to (col, c2o);
+	  print_half_line (right, col, hw);
+	}
+    }
+
+  if (put_newline)
+    putc ('\n', out);
+}
+
+/* Print lines common to both files in side-by-side format.  */
+static void
+print_sdiff_common_lines (limit0, limit1)
+     int limit0, limit1;
+{
+  int i0 = next0, i1 = next1;
+
+  if (! sdiff_skip_common_lines  &&  (i0 != limit0 || i1 != limit1))
+    {
+      if (sdiff_help_sdiff)
+	fprintf (outfile, "i%d,%d\n", limit0 - i0, limit1 - i1);
+
+      if (! sdiff_left_only)
+	{
+	  while (i0 != limit0 && i1 != limit1)
+	    print_1sdiff_line (&files[0].linbuf[i0++], ' ', &files[1].linbuf[i1++]);
+	  while (i1 != limit1)
+	    print_1sdiff_line (0, ')', &files[1].linbuf[i1++]);
+	}
+      while (i0 != limit0)
+	print_1sdiff_line (&files[0].linbuf[i0++], '(', 0);
+    }
+
+  next0 = limit0;
+  next1 = limit1;
+}
+
+/* Print a hunk of an sdiff diff.
+   This is a contiguous portion of a complete edit script,
+   describing changes in consecutive lines.  */
+
+static void
+print_sdiff_hunk (hunk)
+     struct change *hunk;
+{
+  int first0, last0, first1, last1, deletes, inserts;
+  register int i, j;
+
+  /* Determine range of line numbers involved in each file.  */
+  analyze_hunk (hunk, &first0, &last0, &first1, &last1, &deletes, &inserts);
+  if (!deletes && !inserts)
+    return;
+
+  /* Print out lines up to this change.  */
+  print_sdiff_common_lines (first0, first1);
+
+  if (sdiff_help_sdiff)
+    fprintf (outfile, "c%d,%d\n", last0 - first0 + 1, last1 - first1 + 1);
+
+  /* Print ``xxx  |  xxx '' lines */
+  if (inserts && deletes)
+    {
+      for (i = first0, j = first1;  i <= last0 && j <= last1; ++i, ++j)
+	print_1sdiff_line (&files[0].linbuf[i], '|', &files[1].linbuf[j]);
+      deletes = i <= last0;
+      inserts = j <= last1;
+      next0 = first0 = i;
+      next1 = first1 = j;
+    }
+
+
+  /* Print ``     >  xxx '' lines */
+  if (inserts)
+    {
+      for (j = first1; j <= last1; ++j)
+	print_1sdiff_line (0, '>', &files[1].linbuf[j]);
+      next1 = j;
+    }
+
+  /* Print ``xxx  <     '' lines */
+  if (deletes)
+    {
+      for (i = first0; i <= last0; ++i)
+	print_1sdiff_line (&files[0].linbuf[i], '<', 0);
+      next0 = i;
+    }
+}

+ 273 - 0
sys/src/ape/cmd/diff/system.h

@@ -0,0 +1,273 @@
+/* System dependent declarations.
+   Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+/* We must define `volatile' and `const' first (the latter inside config.h),
+   so that they're used consistently in all system includes.  */
+#if !__STDC__
+#ifndef volatile
+#define volatile
+#endif
+#endif
+#include <config.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if __STDC__
+#define PARAMS(args) args
+#define VOID void
+#else
+#define PARAMS(args) ()
+#define VOID char
+#endif
+
+#if STAT_MACROS_BROKEN
+#undef S_ISBLK
+#undef S_ISCHR
+#undef S_ISDIR
+#undef S_ISFIFO
+#undef S_ISREG
+#undef S_ISSOCK
+#endif
+#ifndef S_ISDIR
+#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
+#endif
+#ifndef S_ISREG
+#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
+#endif
+#if !defined(S_ISBLK) && defined(S_IFBLK)
+#define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK)
+#endif
+#if !defined(S_ISCHR) && defined(S_IFCHR)
+#define S_ISCHR(mode) (((mode) & S_IFMT) == S_IFCHR)
+#endif
+#if !defined(S_ISFIFO) && defined(S_IFFIFO)
+#define S_ISFIFO(mode) (((mode) & S_IFMT) == S_IFFIFO)
+#endif
+#if !defined(S_ISSOCK) && defined(S_IFSOCK)
+#define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK)
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifndef SEEK_SET
+#define SEEK_SET 0
+#endif
+#ifndef SEEK_CUR
+#define SEEK_CUR 1
+#endif
+
+#ifndef STDIN_FILENO
+#define STDIN_FILENO 0
+#endif
+#ifndef STDOUT_FILENO
+#define STDOUT_FILENO 1
+#endif
+#ifndef STDERR_FILENO
+#define STDERR_FILENO 2
+#endif
+
+#if HAVE_TIME_H
+#include <time.h>
+#else
+#include <sys/time.h>
+#endif
+
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#else
+#if HAVE_SYS_FILE_H
+#include <sys/file.h>
+#endif
+#endif
+
+#if !HAVE_DUP2
+#define dup2(f,t)	(close (t),  fcntl (f,F_DUPFD,t))
+#endif
+
+#ifndef O_RDONLY
+#define O_RDONLY 0
+#endif
+
+#if HAVE_SYS_WAIT_H
+#include <sys/wait.h>
+#endif
+#ifndef WEXITSTATUS
+#define WEXITSTATUS(stat_val) ((unsigned) (stat_val) >> 8)
+#endif
+#ifndef WIFEXITED
+#define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+
+#ifndef STAT_BLOCKSIZE
+#if HAVE_ST_BLKSIZE
+#define STAT_BLOCKSIZE(s) (s).st_blksize
+#else
+#define STAT_BLOCKSIZE(s) (8 * 1024)
+#endif
+#endif
+
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) ((dirent)->d_namlen)
+# if HAVE_SYS_NDIR_H
+#  include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+#  include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+#  include <ndir.h>
+# endif
+#endif
+
+#if HAVE_VFORK_H
+#include <vfork.h>
+#endif
+
+#if HAVE_STDLIB_H
+#include <stdlib.h>
+#else
+VOID *malloc ();
+VOID *realloc ();
+#endif
+#ifndef getenv
+char *getenv ();
+#endif
+
+#if HAVE_LIMITS_H
+#include <limits.h>
+#endif
+#ifndef INT_MAX
+#define INT_MAX 2147483647
+#endif
+#ifndef CHAR_BIT
+#define CHAR_BIT 8
+#endif
+
+#if STDC_HEADERS || HAVE_STRING_H
+# include <string.h>
+# ifndef bzero
+#  define bzero(s, n) memset (s, 0, n)
+# endif
+#else
+# if !HAVE_STRCHR
+#  define strchr index
+#  define strrchr rindex
+# endif
+char *strchr (), *strrchr ();
+# if !HAVE_MEMCHR
+#  define memcmp(s1, s2, n) bcmp (s1, s2, n)
+#  define memcpy(d, s, n) bcopy (s, d, n)
+void *memchr ();
+# endif
+#endif
+
+#include <ctype.h>
+/* CTYPE_DOMAIN (C) is nonzero if the unsigned char C can safely be given
+   as an argument to <ctype.h> macros like `isspace'.  */
+#if STDC_HEADERS
+#define CTYPE_DOMAIN(c) 1
+#else
+#define CTYPE_DOMAIN(c) ((unsigned) (c) <= 0177)
+#endif
+#ifndef ISPRINT
+#define ISPRINT(c) (CTYPE_DOMAIN (c) && isprint (c))
+#endif
+#ifndef ISSPACE
+#define ISSPACE(c) (CTYPE_DOMAIN (c) && isspace (c))
+#endif
+#ifndef ISUPPER
+#define ISUPPER(c) (CTYPE_DOMAIN (c) && isupper (c))
+#endif
+
+#ifndef ISDIGIT
+#define ISDIGIT(c) ((unsigned) (c) - '0' <= 9)
+#endif
+
+#include <errno.h>
+#if !STDC_HEADERS
+extern int errno;
+#endif
+
+#ifdef min
+#undef min
+#endif
+#ifdef max
+#undef max
+#endif
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+/* This section contains Posix-compliant defaults for macros
+   that are meant to be overridden by hand in config.h as needed.  */
+
+#ifndef filename_cmp
+#define filename_cmp(a, b) strcmp (a, b)
+#endif
+
+#ifndef filename_lastdirchar
+#define filename_lastdirchar(filename) strrchr (filename, '/')
+#endif
+
+#ifndef HAVE_FORK
+#define HAVE_FORK 1
+#endif
+
+#ifndef HAVE_SETMODE
+#define HAVE_SETMODE 0
+#endif
+
+#ifndef initialize_main
+#define initialize_main(argcp, argvp)
+#endif
+
+/* Do struct stat *S, *T describe the same file?  Answer -1 if unknown.  */
+#ifndef same_file
+/* #define same_file(s,t) ((s)->st_ino==(t)->st_ino && (s)->st_dev==(t)->st_dev) */
+#define same_file(s,t) 0
+#endif
+
+/* Place into Q a quoted version of A suitable for `popen' or `system',
+   incrementing Q and junking A.
+   Do not increment Q by more than 4 * strlen (A) + 2.  */
+#ifndef SYSTEM_QUOTE_ARG
+#define SYSTEM_QUOTE_ARG(q, a) \
+  { \
+    *(q)++ = '\''; \
+    for (;  *(a);  *(q)++ = *(a)++) \
+      if (*(a) == '\'') \
+	{ \
+	  *(q)++ = '\''; \
+	  *(q)++ = '\\'; \
+	  *(q)++ = '\''; \
+	} \
+    *(q)++ = '\''; \
+  }
+#endif
+#ifndef FOLD_FN_CHAR
+#define FOLD_FN_CHAR(c) (c)
+#define fnfold(filename) (filename)
+#define fncmp strcmp
+#endif

+ 759 - 0
sys/src/ape/cmd/diff/util.c

@@ -0,0 +1,759 @@
+/* Support routines for GNU DIFF.
+   Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of GNU DIFF.
+
+GNU DIFF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU DIFF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU DIFF; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+/* $FreeBSD: src/contrib/diff/util.c,v 1.2.6.2 2000/09/20 02:24:32 jkh Exp $ */
+
+#include "diff.h"
+
+#ifndef PR_PROGRAM
+#define PR_PROGRAM "/bin/pr"
+#endif
+
+/* Queue up one-line messages to be printed at the end,
+   when -l is specified.  Each message is recorded with a `struct msg'.  */
+
+struct msg
+{
+  struct msg *next;
+  char const *format;
+  char const *arg1;
+  char const *arg2;
+  char const *arg3;
+  char const *arg4;
+};
+
+/* Head of the chain of queues messages.  */
+
+static struct msg *msg_chain;
+
+/* Tail of the chain of queues messages.  */
+
+static struct msg **msg_chain_end = &msg_chain;
+
+/* Use when a system call returns non-zero status.
+   TEXT should normally be the file name.  */
+
+void
+perror_with_name (text)
+     char const *text;
+{
+  int e = errno;
+  fprintf (stderr, "%s: ", program_name);
+  errno = e;
+  perror (text);
+}
+
+/* Use when a system call returns non-zero status and that is fatal.  */
+
+void
+pfatal_with_name (text)
+     char const *text;
+{
+  int e = errno;
+  print_message_queue ();
+  fprintf (stderr, "%s: ", program_name);
+  errno = e;
+  perror (text);
+  exit (2);
+}
+
+/* Print an error message from the format-string FORMAT
+   with args ARG1 and ARG2.  */
+
+void
+error (format, arg, arg1)
+     char const *format, *arg, *arg1;
+{
+  fprintf (stderr, "%s: ", program_name);
+  fprintf (stderr, format, arg, arg1);
+  fprintf (stderr, "\n");
+}
+
+/* Print an error message containing the string TEXT, then exit.  */
+
+void
+fatal (m)
+     char const *m;
+{
+  print_message_queue ();
+  error ("%s", m, 0);
+  exit (2);
+}
+
+/* Like printf, except if -l in effect then save the message and print later.
+   This is used for things like "binary files differ" and "Only in ...".  */
+
+void
+message (format, arg1, arg2)
+     char const *format, *arg1, *arg2;
+{
+  message5 (format, arg1, arg2, 0, 0);
+}
+
+void
+message5 (format, arg1, arg2, arg3, arg4)
+     char const *format, *arg1, *arg2, *arg3, *arg4;
+{
+  if (paginate_flag)
+    {
+      struct msg *new = (struct msg *) xmalloc (sizeof (struct msg));
+      new->format = format;
+      new->arg1 = concat (arg1, "", "");
+      new->arg2 = concat (arg2, "", "");
+      new->arg3 = arg3 ? concat (arg3, "", "") : 0;
+      new->arg4 = arg4 ? concat (arg4, "", "") : 0;
+      new->next = 0;
+      *msg_chain_end = new;
+      msg_chain_end = &new->next;
+    }
+  else
+    {
+      if (sdiff_help_sdiff)
+	putchar (' ');
+      printf (format, arg1, arg2, arg3, arg4);
+    }
+}
+
+/* Output all the messages that were saved up by calls to `message'.  */
+
+void
+print_message_queue ()
+{
+  struct msg *m;
+
+  for (m = msg_chain; m; m = m->next)
+    printf (m->format, m->arg1, m->arg2, m->arg3, m->arg4);
+}
+
+/* Call before outputting the results of comparing files NAME0 and NAME1
+   to set up OUTFILE, the stdio stream for the output to go to.
+
+   Usually, OUTFILE is just stdout.  But when -l was specified
+   we fork off a `pr' and make OUTFILE a pipe to it.
+   `pr' then outputs to our stdout.  */
+
+static char const *current_name0;
+static char const *current_name1;
+static int current_depth;
+
+void
+setup_output (name0, name1, depth)
+     char const *name0, *name1;
+     int depth;
+{
+  current_name0 = name0;
+  current_name1 = name1;
+  current_depth = depth;
+  outfile = 0;
+}
+
+#if HAVE_FORK
+static pid_t pr_pid;
+#endif
+
+void
+begin_output ()
+{
+  char *name;
+
+  if (outfile != 0)
+    return;
+
+  /* Construct the header of this piece of diff.  */
+  name = xmalloc (strlen (current_name0) + strlen (current_name1)
+		  + strlen (switch_string) + 7);
+  /* Posix.2 section 4.17.6.1.1 specifies this format.  But there is a
+     bug in the first printing (IEEE Std 1003.2-1992 p 251 l 3304):
+     it says that we must print only the last component of the pathnames.
+     This requirement is silly and does not match historical practice.  */
+  sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
+
+  if (paginate_flag)
+    {
+      /* Make OUTFILE a pipe to a subsidiary `pr'.  */
+
+#if HAVE_FORK
+      int pipes[2];
+
+      if (pipe (pipes) != 0)
+	pfatal_with_name ("pipe");
+
+      fflush (stdout);
+
+      pr_pid = fork ();
+      if (pr_pid < 0)
+	pfatal_with_name ("vfork");
+
+      if (pr_pid == 0)
+	{
+	  close (pipes[1]);
+	  if (pipes[0] != STDIN_FILENO)
+	    {
+	      if (dup2 (pipes[0], STDIN_FILENO) < 0)
+		pfatal_with_name ("dup2");
+	      close (pipes[0]);
+	    }
+#ifdef __FreeBSD__
+	  execl (PR_PROGRAM, PR_PROGRAM, "-F", "-h", name, 0);
+#else
+	  execl (PR_PROGRAM, PR_PROGRAM, "-f", "-h", name, 0);
+#endif
+	  pfatal_with_name (PR_PROGRAM);
+	}
+      else
+	{
+	  close (pipes[0]);
+	  outfile = fdopen (pipes[1], "w");
+	  if (!outfile)
+	    pfatal_with_name ("fdopen");
+	}
+#else /* ! HAVE_FORK */
+      char *command = xmalloc (4 * strlen (name) + strlen (PR_PROGRAM) + 10);
+      char *p;
+      char const *a = name;
+      sprintf (command, "%s -f -h ", PR_PROGRAM);
+      p = command + strlen (command);
+      SYSTEM_QUOTE_ARG (p, a);
+      *p = 0;
+      outfile = popen (command, "w");
+      if (!outfile)
+	pfatal_with_name (command);
+      free (command);
+#endif /* ! HAVE_FORK */
+    }
+  else
+    {
+
+      /* If -l was not specified, output the diff straight to `stdout'.  */
+
+      outfile = stdout;
+
+      /* If handling multiple files (because scanning a directory),
+	 print which files the following output is about.  */
+      if (current_depth > 0)
+	printf ("%s\n", name);
+    }
+
+  free (name);
+
+  /* A special header is needed at the beginning of context output.  */
+  switch (output_style)
+    {
+    case OUTPUT_CONTEXT:
+      print_context_header (files, 0);
+      break;
+
+    case OUTPUT_UNIFIED:
+      print_context_header (files, 1);
+      break;
+
+    default:
+      break;
+    }
+}
+
+/* Call after the end of output of diffs for one file.
+   Close OUTFILE and get rid of the `pr' subfork.  */
+
+void
+finish_output ()
+{
+  if (outfile != 0 && outfile != stdout)
+    {
+      int wstatus;
+      if (ferror (outfile))
+	fatal ("write error");
+#if ! HAVE_FORK
+      wstatus = pclose (outfile);
+#else /* HAVE_FORK */
+      if (fclose (outfile) != 0)
+	pfatal_with_name ("write error");
+      if (waitpid (pr_pid, &wstatus, 0) < 0)
+	pfatal_with_name ("waitpid");
+#endif /* HAVE_FORK */
+      if (wstatus != 0)
+	fatal ("subsidiary pr failed");
+    }
+
+  outfile = 0;
+}
+
+/* Compare two lines (typically one from each input file)
+   according to the command line options.
+   For efficiency, this is invoked only when the lines do not match exactly
+   but an option like -i might cause us to ignore the difference.
+   Return nonzero if the lines differ.  */
+
+int
+line_cmp (s1, s2)
+     char const *s1, *s2;
+{
+  register unsigned char const *t1 = (unsigned char const *) s1;
+  register unsigned char const *t2 = (unsigned char const *) s2;
+
+  while (1)
+    {
+      register unsigned char c1 = *t1++;
+      register unsigned char c2 = *t2++;
+
+      /* Test for exact char equality first, since it's a common case.  */
+      if (c1 != c2)
+	{
+	  /* Ignore horizontal white space if -b or -w is specified.  */
+
+	  if (ignore_all_space_flag)
+	    {
+	      /* For -w, just skip past any white space.  */
+	      while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
+	      while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
+	    }
+	  else if (ignore_space_change_flag)
+	    {
+	      /* For -b, advance past any sequence of white space in line 1
+		 and consider it just one Space, or nothing at all
+		 if it is at the end of the line.  */
+	      if (ISSPACE (c1))
+		{
+		  while (c1 != '\n')
+		    {
+		      c1 = *t1++;
+		      if (! ISSPACE (c1))
+			{
+			  --t1;
+			  c1 = ' ';
+			  break;
+			}
+		    }
+		}
+
+	      /* Likewise for line 2.  */
+	      if (ISSPACE (c2))
+		{
+		  while (c2 != '\n')
+		    {
+		      c2 = *t2++;
+		      if (! ISSPACE (c2))
+			{
+			  --t2;
+			  c2 = ' ';
+			  break;
+			}
+		    }
+		}
+
+	      if (c1 != c2)
+		{
+		  /* If we went too far when doing the simple test
+		     for equality, go back to the first non-white-space
+		     character in both sides and try again.  */
+		  if (c2 == ' ' && c1 != '\n'
+		      && (unsigned char const *) s1 + 1 < t1
+		      && ISSPACE(t1[-2]))
+		    {
+		      --t1;
+		      continue;
+		    }
+		  if (c1 == ' ' && c2 != '\n'
+		      && (unsigned char const *) s2 + 1 < t2
+		      && ISSPACE(t2[-2]))
+		    {
+		      --t2;
+		      continue;
+		    }
+		}
+	    }
+
+	  /* Lowercase all letters if -i is specified.  */
+
+	  if (ignore_case_flag)
+	    {
+	      if (ISUPPER (c1))
+		c1 = tolower (c1);
+	      if (ISUPPER (c2))
+		c2 = tolower (c2);
+	    }
+
+	  if (c1 != c2)
+	    break;
+	}
+      if (c1 == '\n')
+	return 0;
+    }
+
+  return (1);
+}
+
+/* Find the consecutive changes at the start of the script START.
+   Return the last link before the first gap.  */
+
+struct change *
+find_change (start)
+     struct change *start;
+{
+  return start;
+}
+
+struct change *
+find_reverse_change (start)
+     struct change *start;
+{
+  return start;
+}
+
+/* Divide SCRIPT into pieces by calling HUNKFUN and
+   print each piece with PRINTFUN.
+   Both functions take one arg, an edit script.
+
+   HUNKFUN is called with the tail of the script
+   and returns the last link that belongs together with the start
+   of the tail.
+
+   PRINTFUN takes a subscript which belongs together (with a null
+   link at the end) and prints it.  */
+
+void
+print_script (script, hunkfun, printfun)
+     struct change *script;
+     struct change * (*hunkfun) PARAMS((struct change *));
+     void (*printfun) PARAMS((struct change *));
+{
+  struct change *next = script;
+
+  while (next)
+    {
+      struct change *this, *end;
+
+      /* Find a set of changes that belong together.  */
+      this = next;
+      end = (*hunkfun) (next);
+
+      /* Disconnect them from the rest of the changes,
+	 making them a hunk, and remember the rest for next iteration.  */
+      next = end->link;
+      end->link = 0;
+#ifdef DEBUG
+      debug_script (this);
+#endif
+
+      /* Print this hunk.  */
+      (*printfun) (this);
+
+      /* Reconnect the script so it will all be freed properly.  */
+      end->link = next;
+    }
+}
+
+/* Print the text of a single line LINE,
+   flagging it with the characters in LINE_FLAG (which say whether
+   the line is inserted, deleted, changed, etc.).  */
+
+void
+print_1_line (line_flag, line)
+     char const *line_flag;
+     char const * const *line;
+{
+  char const *text = line[0], *limit = line[1]; /* Help the compiler.  */
+  FILE *out = outfile; /* Help the compiler some more.  */
+  char const *flag_format = 0;
+
+  /* If -T was specified, use a Tab between the line-flag and the text.
+     Otherwise use a Space (as Unix diff does).
+     Print neither space nor tab if line-flags are empty.  */
+
+  if (line_flag && *line_flag)
+    {
+      flag_format = tab_align_flag ? "%s\t" : "%s ";
+      fprintf (out, flag_format, line_flag);
+    }
+
+  output_1_line (text, limit, flag_format, line_flag);
+
+  if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
+    fputc ('\n', out);
+}
+
+/* Output a line from TEXT up to LIMIT.  Without -t, output verbatim.
+   With -t, expand white space characters to spaces, and if FLAG_FORMAT
+   is nonzero, output it with argument LINE_FLAG after every
+   internal carriage return, so that tab stops continue to line up.  */
+
+void
+output_1_line (text, limit, flag_format, line_flag)
+     char const *text, *limit, *flag_format, *line_flag;
+{
+  if (!tab_expand_flag)
+    fwrite (text, sizeof (char), limit - text, outfile);
+  else
+    {
+      register FILE *out = outfile;
+      register unsigned char c;
+      register char const *t = text;
+      register unsigned column = 0;
+
+      while (t < limit)
+	switch ((c = *t++))
+	  {
+	  case '\t':
+	    {
+	      unsigned spaces = TAB_WIDTH - column % TAB_WIDTH;
+	      column += spaces;
+	      do
+		putc (' ', out);
+	      while (--spaces);
+	    }
+	    break;
+
+	  case '\r':
+	    putc (c, out);
+	    if (flag_format && t < limit && *t != '\n')
+	      fprintf (out, flag_format, line_flag);
+	    column = 0;
+	    break;
+
+	  case '\b':
+	    if (column == 0)
+	      continue;
+	    column--;
+	    putc (c, out);
+	    break;
+
+	  default:
+	    if (ISPRINT (c))
+	      column++;
+	    putc (c, out);
+	    break;
+	  }
+    }
+}
+
+int
+change_letter (inserts, deletes)
+     int inserts, deletes;
+{
+  if (!inserts)
+    return 'd';
+  else if (!deletes)
+    return 'a';
+  else
+    return 'c';
+}
+
+/* Translate an internal line number (an index into diff's table of lines)
+   into an actual line number in the input file.
+   The internal line number is LNUM.  FILE points to the data on the file.
+
+   Internal line numbers count from 0 starting after the prefix.
+   Actual line numbers count from 1 within the entire file.  */
+
+int
+translate_line_number (file, lnum)
+     struct file_data const *file;
+     int lnum;
+{
+  return lnum + file->prefix_lines + 1;
+}
+
+void
+translate_range (file, a, b, aptr, bptr)
+     struct file_data const *file;
+     int a, b;
+     int *aptr, *bptr;
+{
+  *aptr = translate_line_number (file, a - 1) + 1;
+  *bptr = translate_line_number (file, b + 1) - 1;
+}
+
+/* Print a pair of line numbers with SEPCHAR, translated for file FILE.
+   If the two numbers are identical, print just one number.
+
+   Args A and B are internal line numbers.
+   We print the translated (real) line numbers.  */
+
+void
+print_number_range (sepchar, file, a, b)
+     int sepchar;
+     struct file_data *file;
+     int a, b;
+{
+  int trans_a, trans_b;
+  translate_range (file, a, b, &trans_a, &trans_b);
+
+  /* Note: we can have B < A in the case of a range of no lines.
+     In this case, we should print the line number before the range,
+     which is B.  */
+  if (trans_b > trans_a)
+    fprintf (outfile, "%d%c%d", trans_a, sepchar, trans_b);
+  else
+    fprintf (outfile, "%d", trans_b);
+}
+
+/* Look at a hunk of edit script and report the range of lines in each file
+   that it applies to.  HUNK is the start of the hunk, which is a chain
+   of `struct change'.  The first and last line numbers of file 0 are stored in
+   *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
+   Note that these are internal line numbers that count from 0.
+
+   If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
+
+   Also set *DELETES nonzero if any lines of file 0 are deleted
+   and set *INSERTS nonzero if any lines of file 1 are inserted.
+   If only ignorable lines are inserted or deleted, both are
+   set to 0.  */
+
+void
+analyze_hunk (hunk, first0, last0, first1, last1, deletes, inserts)
+     struct change *hunk;
+     int *first0, *last0, *first1, *last1;
+     int *deletes, *inserts;
+{
+  int l0, l1, show_from, show_to;
+  int i;
+  int trivial = ignore_blank_lines_flag || ignore_regexp_list;
+  struct change *next;
+
+  show_from = show_to = 0;
+
+  *first0 = hunk->line0;
+  *first1 = hunk->line1;
+
+  next = hunk;
+  do
+    {
+      l0 = next->line0 + next->deleted - 1;
+      l1 = next->line1 + next->inserted - 1;
+      show_from += next->deleted;
+      show_to += next->inserted;
+
+      for (i = next->line0; i <= l0 && trivial; i++)
+	if (!ignore_blank_lines_flag || files[0].linbuf[i][0] != '\n')
+	  {
+	    struct regexp_list *r;
+	    char const *line = files[0].linbuf[i];
+	    int len = files[0].linbuf[i + 1] - line;
+
+	    for (r = ignore_regexp_list; r; r = r->next)
+	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
+		break;	/* Found a match.  Ignore this line.  */
+	    /* If we got all the way through the regexp list without
+	       finding a match, then it's nontrivial.  */
+	    if (!r)
+	      trivial = 0;
+	  }
+
+      for (i = next->line1; i <= l1 && trivial; i++)
+	if (!ignore_blank_lines_flag || files[1].linbuf[i][0] != '\n')
+	  {
+	    struct regexp_list *r;
+	    char const *line = files[1].linbuf[i];
+	    int len = files[1].linbuf[i + 1] - line;
+
+	    for (r = ignore_regexp_list; r; r = r->next)
+	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
+		break;	/* Found a match.  Ignore this line.  */
+	    /* If we got all the way through the regexp list without
+	       finding a match, then it's nontrivial.  */
+	    if (!r)
+	      trivial = 0;
+	  }
+    }
+  while ((next = next->link) != 0);
+
+  *last0 = l0;
+  *last1 = l1;
+
+  /* If all inserted or deleted lines are ignorable,
+     tell the caller to ignore this hunk.  */
+
+  if (trivial)
+    show_from = show_to = 0;
+
+  *deletes = show_from;
+  *inserts = show_to;
+}
+
+/* malloc a block of memory, with fatal error message if we can't do it. */
+
+VOID *
+xmalloc (size)
+     size_t size;
+{
+  register VOID *value;
+
+  if (size == 0)
+    size = 1;
+
+  value = (VOID *) malloc (size);
+
+  if (!value)
+    fatal ("memory exhausted");
+  return value;
+}
+
+/* realloc a block of memory, with fatal error message if we can't do it. */
+
+VOID *
+xrealloc (old, size)
+     VOID *old;
+     size_t size;
+{
+  register VOID *value;
+
+  if (size == 0)
+    size = 1;
+
+  value = (VOID *) realloc (old, size);
+
+  if (!value)
+    fatal ("memory exhausted");
+  return value;
+}
+
+/* Concatenate three strings, returning a newly malloc'd string.  */
+
+char *
+concat (s1, s2, s3)
+     char const *s1, *s2, *s3;
+{
+  size_t len = strlen (s1) + strlen (s2) + strlen (s3);
+  char *new = xmalloc (len + 1);
+  sprintf (new, "%s%s%s", s1, s2, s3);
+  return new;
+}
+
+/* Yield the newly malloc'd pathname
+   of the file in DIR whose filename is FILE.  */
+
+char *
+dir_file_pathname (dir, file)
+     char const *dir, *file;
+{
+  char const *p = filename_lastdirchar (dir);
+  return concat (dir, "/" + (p && !p[1]), file);
+}
+
+void
+debug_script (sp)
+     struct change *sp;
+{
+  fflush (stdout);
+  for (; sp; sp = sp->link)
+    fprintf (stderr, "%3d %3d delete %d insert %d\n",
+	     sp->line0, sp->line1, sp->deleted, sp->inserted);
+  fflush (stderr);
+}

+ 5 - 0
sys/src/ape/cmd/diff/version.c

@@ -0,0 +1,5 @@
+/* Version number of GNU diff.  */
+
+#include "config.h"
+
+char const version_string[] = "2.7";

+ 81 - 0
sys/src/ape/cmd/diff/xmalloc.c

@@ -0,0 +1,81 @@
+/* xmalloc.c -- malloc with out of memory checking
+   Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#if __STDC__
+#define VOID void
+#else
+#define VOID char
+#endif
+
+#include <sys/types.h>
+
+#if STDC_HEADERS
+#include <stdlib.h>
+#else
+VOID *malloc ();
+VOID *realloc ();
+void free ();
+#endif
+
+#if __STDC__ && defined (HAVE_VPRINTF)
+void error (int, int, char const *, ...);
+#else
+void error ();
+#endif
+
+/* Allocate N bytes of memory dynamically, with error checking.  */
+
+VOID *
+xmalloc (n)
+     size_t n;
+{
+  VOID *p;
+
+  p = malloc (n);
+  if (p == 0)
+    /* Must exit with 2 for `cmp'.  */
+    error (2, 0, "memory exhausted");
+  return p;
+}
+
+/* Change the size of an allocated block of memory P to N bytes,
+   with error checking.
+   If P is NULL, run xmalloc.
+   If N is 0, run free and return NULL.  */
+
+VOID *
+xrealloc (p, n)
+     VOID *p;
+     size_t n;
+{
+  if (p == 0)
+    return xmalloc (n);
+  if (n == 0)
+    {
+      free (p);
+      return 0;
+    }
+  p = realloc (p, n);
+  if (p == 0)
+    /* Must exit with 2 for `cmp'.  */
+    error (2, 0, "memory exhausted");
+  return p;
+}

+ 38 - 0
sys/src/ape/cmd/dirname.c

@@ -0,0 +1,38 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+main(int argc, char **argv)
+{
+	char *f, *s;
+	int n;
+
+	if(argc != 2){
+		fprintf(stderr, "Usage: dirname string\n");
+		exit(1);
+	}
+	s = argv[1];
+	f = s + strlen(s) - 1;
+	while(f > s && *f == '/')
+		f--;
+	*++f = 0;
+	/* now f is after last char of string, trailing slashes removed */
+
+	for(; f >= s; f--)
+		if(*f == '/'){
+			f++;
+			break;
+		}
+	if(f < s) {
+		*s = '.';
+		s[1] = 0;
+	} else {
+		--f;
+		while(f > s && *f == '/')
+			f--;
+		f[1] = 0;
+	}
+
+	printf("%s\n", s);
+	return 0;
+}

+ 295 - 0
sys/src/ape/cmd/expr/expr.y

@@ -0,0 +1,295 @@
+/* Yacc productions for "expr" command: */
+
+%token OR AND ADD SUBT MULT DIV REM EQ GT GEQ LT LEQ NEQ
+%token A_STRING SUBSTR LENGTH INDEX NOARG MATCH
+
+/* operators listed below in increasing precedence: */
+%left OR
+%left AND
+%left EQ LT GT GEQ LEQ NEQ
+%left ADD SUBT
+%left MULT DIV REM
+%left MCH
+%left MATCH
+%left SUBSTR
+%left LENGTH INDEX
+
+%{
+#define YYSTYPE charp
+
+typedef char *charp;
+%}
+
+%%
+
+/* a single `expression' is evaluated and printed: */
+
+expression:	expr NOARG = {
+			prt(1, $1);
+			exit((!strcmp($1,"0")||!strcmp($1,"\0"))? 1: 0);
+			}
+	;
+
+
+expr:	'(' expr ')' = { $$ = $2; }
+	| expr OR expr   = { $$ = conj(OR, $1, $3); }
+	| expr AND expr   = { $$ = conj(AND, $1, $3); }
+	| expr EQ expr   = { $$ = rel(EQ, $1, $3); }
+	| expr GT expr   = { $$ = rel(GT, $1, $3); }
+	| expr GEQ expr   = { $$ = rel(GEQ, $1, $3); }
+	| expr LT expr   = { $$ = rel(LT, $1, $3); }
+	| expr LEQ expr   = { $$ = rel(LEQ, $1, $3); }
+	| expr NEQ expr   = { $$ = rel(NEQ, $1, $3); }
+	| expr ADD expr   = { $$ = arith(ADD, $1, $3); }
+	| expr SUBT expr   = { $$ = arith(SUBT, $1, $3); }
+	| expr MULT expr   = { $$ = arith(MULT, $1, $3); }
+	| expr DIV expr   = { $$ = arith(DIV, $1, $3); }
+	| expr REM expr   = { $$ = arith(REM, $1, $3); }
+	| expr MCH expr	 = { $$ = match($1, $3); }
+	| MATCH expr expr = { $$ = match($2, $3); }
+	| SUBSTR expr expr expr = { $$ = substr($2, $3, $4); }
+	| LENGTH expr       = { $$ = length($2); }
+	| INDEX expr expr = { $$ = index($2, $3); }
+	| A_STRING
+	;
+%%
+/*	expression command */
+#include <stdio.h>
+/* get rid of yacc debug printf's */
+#define printf
+#define ESIZE	512
+#define error(c)	errxx(c)
+#define EQL(x,y) !strcmp(x,y)
+long atol();
+char *ltoa();
+char	**Av;
+int	Ac;
+int	Argi;
+
+char Mstring[1][128];
+char *malloc();
+extern int nbra;
+int yyparse(void);
+
+main(argc, argv) char **argv; {
+	Ac = argc;
+	Argi = 1;
+	Av = argv;
+	yyparse();
+}
+
+char *operator[] = { "|", "&", "+", "-", "*", "/", "%", ":",
+	"=", "==", "<", "<=", ">", ">=", "!=",
+	"match", "substr", "length", "index", "\0" };
+int op[] = { OR, AND, ADD,  SUBT, MULT, DIV, REM, MCH,
+	EQ, EQ, LT, LEQ, GT, GEQ, NEQ,
+	MATCH, SUBSTR, LENGTH, INDEX };
+yylex() {
+	register char *p;
+	register i;
+
+	if(Argi >= Ac) return NOARG;
+
+	p = Av[Argi++];
+
+	if(*p == '(' || *p == ')')
+		return (int)*p;
+	for(i = 0; *operator[i]; ++i)
+		if(EQL(operator[i], p))
+			return op[i];
+
+	yylval = p;
+	return A_STRING;
+}
+
+char *rel(op, r1, r2) register char *r1, *r2; {
+	register i;
+
+	if(ematch(r1, "-\\{0,1\\}[0-9]*$") && ematch(r2, "-\\{0,1\\}[0-9]*$"))
+		i = atol(r1) - atol(r2);
+	else
+		i = strcmp(r1, r2);
+	switch(op) {
+	case EQ: i = i==0; break;
+	case GT: i = i>0; break;
+	case GEQ: i = i>=0; break;
+	case LT: i = i<0; break;
+	case LEQ: i = i<=0; break;
+	case NEQ: i = i!=0; break;
+	}
+	return i? "1": "0";
+}
+
+char *arith(op, r1, r2) char *r1, *r2; {
+	long i1, i2;
+	register char *rv;
+
+	if(!(ematch(r1, "-\\{0,1\\}[0-9]*$") && ematch(r2, "-\\{0,1\\}[0-9]*$")))
+		yyerror("non-numeric argument");
+	i1 = atol(r1);
+	i2 = atol(r2);
+
+	switch(op) {
+	case ADD: i1 = i1 + i2; break;
+	case SUBT: i1 = i1 - i2; break;
+	case MULT: i1 = i1 * i2; break;
+	case DIV: i1 = i1 / i2; break;
+	case REM: i1 = i1 % i2; break;
+	}
+	rv = malloc(16);
+	strcpy(rv, ltoa(i1));
+	return rv;
+}
+char *conj(op, r1, r2) char *r1, *r2; {
+	register char *rv;
+
+	switch(op) {
+
+	case OR:
+		if(EQL(r1, "0")
+		|| EQL(r1, ""))
+			if(EQL(r2, "0")
+			|| EQL(r2, ""))
+				rv = "0";
+			else
+				rv = r2;
+		else
+			rv = r1;
+		break;
+	case AND:
+		if(EQL(r1, "0")
+		|| EQL(r1, ""))
+			rv = "0";
+		else if(EQL(r2, "0")
+		|| EQL(r2, ""))
+			rv = "0";
+		else
+			rv = r1;
+		break;
+	}
+	return rv;
+}
+
+char *substr(v, s, w) char *v, *s, *w; {
+register si, wi;
+register char *res;
+
+	si = atol(s);
+	wi = atol(w);
+	while(--si) if(*v) ++v;
+
+	res = v;
+
+	while(wi--) if(*v) ++v;
+
+	*v = '\0';
+	return res;
+}
+
+char *length(s) register char *s; {
+	register i = 0;
+	register char *rv;
+
+	while(*s++) ++i;
+
+	rv = malloc(8);
+	strcpy(rv, ltoa((long)i));
+	return rv;
+}
+
+char *index(s, t) char *s, *t; {
+	register i, j;
+	register char *rv;
+
+	for(i = 0; s[i] ; ++i)
+		for(j = 0; t[j] ; ++j)
+			if(s[i]==t[j]) {
+				strcpy(rv=malloc(8), ltoa((long)++i));
+				return rv;
+			}
+	return "0";
+}
+
+char *match(s, p)
+{
+	register char *rv;
+
+	strcpy(rv=malloc(8), ltoa((long)ematch(s, p)));
+	if(nbra) {
+		rv = malloc(strlen(Mstring[0])+1);
+		strcpy(rv, Mstring[0]);
+	}
+	return rv;
+}
+
+#define INIT	register char *sp = instring;
+#define GETC()		(*sp++)
+#define PEEKC()		(*sp)
+#define UNGETC(c)	(--sp)
+#define RETURN(c)	return
+#define ERROR(c)	errxx(c)
+
+
+ematch(s, p)
+char *s;
+register char *p;
+{
+	static char expbuf[ESIZE];
+	char *compile();
+	register num;
+	extern char *braslist[], *braelist[], *loc2;
+
+	compile(p, expbuf, &expbuf[ESIZE], 0);
+	if(nbra > 1)
+		yyerror("Too many '\\('s");
+	if(advance(s, expbuf)) {
+		if(nbra == 1) {
+			p = braslist[0];
+			num = braelist[0] - p;
+			strncpy(Mstring[0], p, num);
+			Mstring[0][num] = '\0';
+		}
+		return(loc2-s);
+	}
+	return(0);
+}
+
+errxx(c)
+{
+	yyerror("RE error");
+}
+
+#include  "regexp.h"
+yyerror(s)
+
+{
+	write(2, "expr: ", 6);
+	prt(2, s);
+	exit(2);
+}
+prt(fd, s)
+char *s;
+{
+	write(fd, s, strlen(s));
+	write(fd, "\n", 1);
+}
+char *ltoa(l)
+long l;
+{
+	static char str[20];
+	register char *sp = &str[18];
+	register i;
+	register neg = 0;
+
+	if(l < 0)
+		++neg, l *= -1;
+	str[19] = '\0';
+	do {
+		i = l % 10;
+		*sp-- = '0' + i;
+		l /= 10;
+	} while(l);
+	if(neg)
+		*sp-- = '-';
+	return ++sp;
+}

+ 14 - 0
sys/src/ape/cmd/expr/mkfile

@@ -0,0 +1,14 @@
+MKSHELL=rc
+APE=$NXM/sys/src/ape
+<$APE/config
+
+TARG=expr
+OFILES=y.tab.$O
+YFILES=expr.y
+HFILES=regexp.h
+
+BIN=$APEBIN
+<$NXM/sys/src/cmd/mkone
+
+YFLAGS=-S
+CFLAGS=-B -c $CFLAGS

+ 410 - 0
sys/src/ape/cmd/expr/regexp.h

@@ -0,0 +1,410 @@
+#define	CBRA	2
+#define	CCHR	4
+#define	CDOT	8
+#define	CCL	12
+#define	CDOL	20
+#define	CEOF	22
+#define	CKET	24
+#define	CBACK	36
+
+#define	STAR	01
+#define RNGE	03
+
+#define	NBRA	9
+
+#define PLACE(c)	ep[c >> 3] |= bittab[c & 07]
+#define ISTHERE(c)	(ep[c >> 3] & bittab[c & 07])
+
+char	*braslist[NBRA];
+char	*braelist[NBRA];
+int	nbra, ebra;
+char *loc1, *loc2, *locs;
+int	sed;
+
+int	circf;
+int	low;
+int	size;
+
+char	bittab[] = {
+	1,
+	2,
+	4,
+	8,
+	16,
+	32,
+	64,
+	128
+};
+
+char *
+compile(instring, ep, endbuf, seof)
+register char *ep;
+char *instring, *endbuf;
+{
+	INIT	/* Dependent declarations and initializations */
+	register c;
+	register eof = seof;
+	char *lastep = instring;
+	int cclcnt;
+	char bracket[NBRA], *bracketp;
+	int closed;
+	char neg;
+	int lc;
+	int i, cflg;
+
+	lastep = 0;
+	if((c = GETC()) == eof) {
+		if(*ep == 0 && !sed)
+			ERROR(41);
+		RETURN(ep);
+	}
+	bracketp = bracket;
+	circf = closed = nbra = ebra = 0;
+	if (c == '^')
+		circf++;
+	else
+		UNGETC(c);
+	for (;;) {
+		if (ep >= endbuf)
+			ERROR(50);
+		if((c = GETC()) != '*' && ((c != '\\') || (PEEKC() != '{')))
+			lastep = ep;
+		if (c == eof) {
+			*ep++ = CEOF;
+			RETURN(ep);
+		}
+		switch (c) {
+
+		case '.':
+			*ep++ = CDOT;
+			continue;
+
+		case '\n':
+			ERROR(36);
+		case '*':
+			if (lastep==0 || *lastep==CBRA || *lastep==CKET)
+				goto defchar;
+			*lastep |= STAR;
+			continue;
+
+		case '$':
+			if(PEEKC() != eof)
+				goto defchar;
+			*ep++ = CDOL;
+			continue;
+
+		case '[':
+			if(&ep[17] >= endbuf)
+				ERROR(50);
+
+			*ep++ = CCL;
+			lc = 0;
+			for(i = 0; i < 16; i++)
+				ep[i] = 0;
+
+			neg = 0;
+			if((c = GETC()) == '^') {
+				neg = 1;
+				c = GETC();
+			}
+
+			do {
+				if(c == '\0' || c == '\n')
+					ERROR(49);
+				if(c == '-' && lc != 0) {
+					if ((c = GETC()) == ']') {
+						PLACE('-');
+						break;
+					}
+					while(lc < c) {
+						PLACE(lc);
+						lc++;
+					}
+				}
+				lc = c;
+				PLACE(c);
+			} while((c = GETC()) != ']');
+			if(neg) {
+				for(cclcnt = 0; cclcnt < 16; cclcnt++)
+					ep[cclcnt] ^= -1;
+				ep[0] &= 0376;
+			}
+
+			ep += 16;
+
+			continue;
+
+		case '\\':
+			switch(c = GETC()) {
+
+			case '(':
+				if(nbra >= NBRA)
+					ERROR(43);
+				*bracketp++ = nbra;
+				*ep++ = CBRA;
+				*ep++ = nbra++;
+				continue;
+
+			case ')':
+				if(bracketp <= bracket || ++ebra != nbra)
+					ERROR(42);
+				*ep++ = CKET;
+				*ep++ = *--bracketp;
+				closed++;
+				continue;
+
+			case '{':
+				if(lastep == (char *) (0))
+					goto defchar;
+				*lastep |= RNGE;
+				cflg = 0;
+			nlim:
+				c = GETC();
+				i = 0;
+				do {
+					if ('0' <= c && c <= '9')
+						i = 10 * i + c - '0';
+					else
+						ERROR(16);
+				} while(((c = GETC()) != '\\') && (c != ','));
+				if (i > 255)
+					ERROR(11);
+				*ep++ = i;
+				if (c == ',') {
+					if(cflg++)
+						ERROR(44);
+					if((c = GETC()) == '\\')
+						*ep++ = 255;
+					else {
+						UNGETC(c);
+						goto nlim; /* get 2'nd number */
+					}
+				}
+				if(GETC() != '}')
+					ERROR(45);
+				if(!cflg)	/* one number */
+					*ep++ = i;
+				else if((ep[-1] & 0377) < (ep[-2] & 0377))
+					ERROR(46);
+				continue;
+
+			case '\n':
+				ERROR(36);
+
+			case 'n':
+				c = '\n';
+				goto defchar;
+
+			default:
+				if(c >= '1' && c <= '9') {
+					if((c -= '1') >= closed)
+						ERROR(25);
+					*ep++ = CBACK;
+					*ep++ = c;
+					continue;
+				}
+			}
+			/* Drop through to default to use \ to turn off special chars */
+
+		defchar:
+		default:
+			lastep = ep;
+			*ep++ = CCHR;
+			*ep++ = c;
+		}
+	}
+}
+
+step(p1, p2)
+register char *p1, *p2;
+{
+	register c;
+
+	if (circf) {
+		loc1 = p1;
+		return(advance(p1, p2));
+	}
+	/* fast check for first character */
+	if (*p2==CCHR) {
+		c = p2[1];
+		do {
+			if (*p1 != c)
+				continue;
+			if (advance(p1, p2)) {
+				loc1 = p1;
+				return(1);
+			}
+		} while (*p1++);
+		return(0);
+	}
+		/* regular algorithm */
+	do {
+		if (advance(p1, p2)) {
+			loc1 = p1;
+			return(1);
+		}
+	} while (*p1++);
+	return(0);
+}
+
+advance(lp, ep)
+register char *lp, *ep;
+{
+	register char *curlp;
+	char c;
+	char *bbeg;
+	int ct;
+
+	for (;;) switch (*ep++) {
+
+	case CCHR:
+		if (*ep++ == *lp++)
+			continue;
+		return(0);
+
+	case CDOT:
+		if (*lp++)
+			continue;
+		return(0);
+
+	case CDOL:
+		if (*lp==0)
+			continue;
+		return(0);
+
+	case CEOF:
+		loc2 = lp;
+		return(1);
+
+	case CCL:
+		c = *lp++ & 0177;
+		if(ISTHERE(c)) {
+			ep += 16;
+			continue;
+		}
+		return(0);
+	case CBRA:
+		braslist[*ep++] = lp;
+		continue;
+
+	case CKET:
+		braelist[*ep++] = lp;
+		continue;
+
+	case CCHR|RNGE:
+		c = *ep++;
+		getrnge(ep);
+		while(low--)
+			if(*lp++ != c)
+				return(0);
+		curlp = lp;
+		while(size--) 
+			if(*lp++ != c)
+				break;
+		if(size < 0)
+			lp++;
+		ep += 2;
+		goto star;
+
+	case CDOT|RNGE:
+		getrnge(ep);
+		while(low--)
+			if(*lp++ == '\0')
+				return(0);
+		curlp = lp;
+		while(size--)
+			if(*lp++ == '\0')
+				break;
+		if(size < 0)
+			lp++;
+		ep += 2;
+		goto star;
+
+	case CCL|RNGE:
+		getrnge(ep + 16);
+		while(low--) {
+			c = *lp++ & 0177;
+			if(!ISTHERE(c))
+				return(0);
+		}
+		curlp = lp;
+		while(size--) {
+			c = *lp++ & 0177;
+			if(!ISTHERE(c))
+				break;
+		}
+		if(size < 0)
+			lp++;
+		ep += 18;		/* 16 + 2 */
+		goto star;
+
+	case CBACK:
+		bbeg = braslist[*ep];
+		ct = braelist[*ep++] - bbeg;
+
+		if(ecmp(bbeg, lp, ct)) {
+			lp += ct;
+			continue;
+		}
+		return(0);
+
+	case CBACK|STAR:
+		bbeg = braslist[*ep];
+		ct = braelist[*ep++] - bbeg;
+		curlp = lp;
+		while(ecmp(bbeg, lp, ct))
+			lp += ct;
+
+		while(lp >= curlp) {
+			if(advance(lp, ep))	return(1);
+			lp -= ct;
+		}
+		return(0);
+
+
+	case CDOT|STAR:
+		curlp = lp;
+		while (*lp++);
+		goto star;
+
+	case CCHR|STAR:
+		curlp = lp;
+		while (*lp++ == *ep);
+		ep++;
+		goto star;
+
+	case CCL|STAR:
+		curlp = lp;
+		do {
+			c = *lp++ & 0177;
+		} while(ISTHERE(c));
+		ep += 16;
+		goto star;
+
+	star:
+		do {
+			if(--lp == locs)
+				break;
+			if (advance(lp, ep))
+				return(1);
+		} while (lp > curlp);
+		return(0);
+
+	}
+}
+
+getrnge(str)
+register char *str;
+{
+	low = *str++ & 0377;
+	size = *str == 255 ? 20000 : (*str &0377) - low;
+}
+
+ecmp(a, b, count)
+register char	*a, *b;
+register	count;
+{
+	while(count--)
+		if(*a++ != *b++)	return(0);
+	return(1);
+}

+ 75 - 0
sys/src/ape/cmd/kill.c

@@ -0,0 +1,75 @@
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <signal.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#define NSIG SIGUSR2
+
+char *signm[NSIG+1] = { 0,
+"SIGHUP", "SIGINT", "SIGQUIT", "SIGILL", "SIGABRT", "SIGFPE", "SIGKILL", /* 1-7 */
+"SIGSEGV", "SIGPIPE", "SIGALRM", "SIGTERM", "SIGUR1", "SIGUSR2", /* 8-13 */
+};
+
+main(int argc, char **argv)
+{
+	int signo, pid, res;
+	int errlev;
+
+	errlev = 0;
+	if (argc <= 1) {
+	usage:
+		fprintf(stderr, "usage: kill [ -sig ] pid ...\n");
+		fprintf(stderr, "for a list of signals: kill -l\n");
+		exit(2);
+	}
+	if (*argv[1] == '-') {
+		if (argv[1][1] == 'l') {
+			int i = 0;
+			for (signo = 1; signo <= NSIG; signo++)
+				if (signm[signo]) {
+					printf("%s ", signm[signo]);
+					if (++i%8 == 0)
+						printf("\n");
+				}
+			if(i%8 !=0)
+				printf("\n");
+			exit(0);
+		} else if (isdigit(argv[1][1])) {
+			signo = atoi(argv[1]+1);
+			if (signo < 0 || signo > NSIG) {
+				fprintf(stderr, "kill: %s: number out of range\n",
+				    argv[1]);
+				exit(1);
+			}
+		} else {
+			char *name = argv[1]+1;
+			for (signo = 1; signo <= NSIG; signo++)
+				if (signm[signo] && (
+				    !strcmp(signm[signo], name)||
+				    !strcmp(signm[signo]+3, name)))
+					goto foundsig;
+			fprintf(stderr, "kill: %s: unknown signal; kill -l lists signals\n", name);
+			exit(1);
+foundsig:
+			;
+		}
+		argc--;
+		argv++;
+	} else
+		signo = SIGTERM;
+	argv++;
+	while (argc > 1) {
+		if ((**argv<'0' || **argv>'9') && **argv!='-')
+			goto usage;
+		res = kill(pid = atoi(*argv), signo);
+		if (res<0) {
+			perror("kill");
+		}
+		argc--;
+		argv++;
+	}
+	return(errlev);
+}

+ 213 - 0
sys/src/ape/cmd/make/defs.h

@@ -0,0 +1,213 @@
+/* defs 4.2 85/10/28 */
+#define _POSIX_SOURCE
+#define _RESEARCH_SOURCE
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <signal.h>
+#include <time.h>
+#include <dirent.h>
+#include <limits.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#ifndef SHELLCOM
+#define SHELLCOM "/bin/sh"
+#endif
+
+typedef char flag;	/* represent a few bit flag */
+
+#define NO	0
+#define YES	1
+
+#define equal(a,b)	(! strcmp(a,b))
+#define HASHSIZE	1021
+#define NLEFTS	512
+#define NCHARS	500
+#define NINTS	250
+#define INMAX	20000
+#define OUTMAX	20000
+#define QBUFMAX	20000
+#define MAXDIR	10
+#define MAXPROC	100
+#define MAXINCLUDE	17
+#define PROCLIMIT	3
+
+#define ALLDEPS	1
+#define SOMEDEPS	2
+
+#define META	01
+#define TERMINAL	02
+extern char funny[128];
+
+
+#define ALLOC(x) (struct x *) ckalloc(sizeof(struct x))
+#define CHNULL	(char *) NULL
+
+extern int sigivalue;
+extern int sigqvalue;
+extern int dbgflag;
+extern int prtrflag;
+extern int silflag;
+extern int noexflag;
+extern int keepgoing;
+extern int noruleflag;
+extern int touchflag;
+extern int questflag;
+extern int oldflag;
+extern int ndocoms;
+extern int ignerr;
+extern int okdel;
+extern int forceshell;
+extern int inarglist;
+extern char **envpp;	/* points to slot in environment vector */
+extern char *prompt;
+extern int nopdir;
+
+typedef struct nameblock *nameblkp;
+typedef struct depblock *depblkp;
+typedef struct lineblock *lineblkp;
+typedef struct chain *chainp;
+
+struct nameblock
+	{
+	nameblkp nxtnameblock;
+	char *namep;
+	lineblkp linep;
+	flag done;
+	flag septype;
+	flag isarch;
+	flag isdir;
+	time_t modtime;
+	};
+
+extern nameblkp mainname;
+extern nameblkp firstname;
+extern nameblkp *hashtab;
+extern int nhashed;
+extern int hashsize;
+extern int hashthresh;
+
+struct lineblock
+	{
+	lineblkp nxtlineblock;
+	struct depblock *depp;
+	struct shblock *shp;
+	};
+extern lineblkp sufflist;
+
+struct depblock
+	{
+	depblkp nxtdepblock;
+	nameblkp depname;
+	char nowait;
+	} ;
+
+struct shblock
+	{
+	struct shblock *nxtshblock;
+	char *shbp;
+	};
+
+struct varblock
+	{
+	struct varblock *nxtvarblock;
+	char *varname;
+	char *varval;
+	char **export;
+	flag noreset;
+	flag used;
+	};
+extern struct varblock *firstvar;
+
+struct pattern
+	{
+	struct pattern *nxtpattern;
+	char *patval;
+	};
+extern struct pattern *firstpat;
+
+struct dirhd
+	{
+	struct dirhd *nxtdirhd;
+	time_t dirtime;
+	int dirok;
+	DIR * dirfc;
+	char *dirn;
+	};
+extern struct dirhd *firstod;
+
+
+struct chain
+	{
+	chainp nextp;
+	char *datap;
+	};
+
+struct wild
+	{
+	struct wild *next;
+	lineblkp linep;
+	char *left;
+	char *right;
+	int llen;
+	int rlen;
+	int totlen;
+	};
+
+typedef struct wild *wildp;
+extern wildp firstwild;
+extern wildp lastwild;
+
+
+/* date for processes */
+extern int proclimit;	/* maximum spawned processes allowed alive at one time */
+extern int proclive;	/* number of spawned processes awaited */
+extern int nproc;	/* next slot in process stack to use */
+extern struct process
+	{
+	int pid;
+	flag nohalt;
+	flag nowait;
+	flag done;
+	} procstack[ ];
+
+extern void	intrupt(int);
+extern void	enbint(void (*)(int));
+extern int	doname(nameblkp, int, time_t *, int);
+extern int	docom(struct shblock *, int, int);
+extern int	dosys(char *, int, int, char *);
+extern int	waitstack(int);
+extern void	touch(int, char*);
+extern time_t	exists(char *);
+extern time_t	prestime(void);
+extern depblkp	srchdir(char*, int, depblkp);
+extern time_t	lookarch(char *);
+extern void	dirsrch(char *);
+extern void	baddirs(void);
+extern nameblkp	srchname(char *);
+extern nameblkp	makename(char *);
+extern int	hasparen(char *);
+extern void	newhash(int);
+extern nameblkp	chkname(char *);
+extern char	*copys(char *);
+extern char	*concat(char *, char *, char *);
+extern int	suffix(char *, char *, char *);
+extern int	*ckalloc(int);
+extern char	*subst(char *, char *);
+extern void	setvar(char *, char *, int);
+extern void	set3var(char *, char *);
+extern int	eqsign(char *);
+extern struct varblock *varptr(char *);
+extern int	dynmacro(char *);
+extern void	fatal1(char *, char *);
+extern void	fatal(char *);
+extern chainp	appendq(chainp, char *);
+extern char	*mkqlist(chainp, char *);
+extern wildp	iswild(char *);
+extern char	*wildmatch(wildp, char *, int);
+extern char	*wildsub(char *, char *);
+extern int	parse(char *);
+extern int	yylex(void);

+ 380 - 0
sys/src/ape/cmd/make/doname.c

@@ -0,0 +1,380 @@
+#include "defs.h"
+
+static int docom1(char *, int, int, int, int);
+static void expand(depblkp);
+
+/*  BASIC PROCEDURE.  RECURSIVE.  */
+
+/*
+p->done = 0   don't know what to do yet
+p->done = 1   file in process of being updated
+p->done = 2   file already exists in current state
+p->done = 3   file make failed
+*/
+
+int
+doname(nameblkp p, int reclevel, time_t *tval, int nowait)
+{
+int errstat;
+int okdel1;
+int didwork;
+int len;
+time_t td, td1, tdep, ptime, ptime1;
+depblkp q;
+depblkp qtemp, suffp, suffp1;
+nameblkp p1, p2;
+struct shblock *implcom, *explcom;
+lineblkp lp;
+lineblkp lp1, lp2;
+char sourcename[100], prefix[100], temp[100], concsuff[20];
+char *stem;
+char *pnamep, *p1namep;
+chainp allchain, qchain;
+char qbuf[QBUFMAX], tgsbuf[QBUFMAX];
+wildp wp;
+int nproc1;
+char *lastslash, *s;
+
+if(p == 0)
+	{
+	*tval = 0;
+	return 0;
+	}
+
+if(dbgflag)
+	{
+	printf("doname(%s,%d)\n",p->namep,reclevel);
+	fflush(stdout);
+	}
+
+if(p->done > 0)
+	{
+	*tval = p->modtime;
+	return (p->done == 3);
+	}
+
+errstat = 0;
+tdep = 0;
+implcom = 0;
+explcom = 0;
+ptime = exists(p->namep);
+ptime1 = 0;
+didwork = NO;
+p->done = 1;	/* avoid infinite loops */
+nproc1 = nproc;	/* current depth of process stack */
+
+qchain = NULL;
+allchain = NULL;
+
+/* define values of Bradford's $$@ and $$/ macros */
+for(s = lastslash = p->namep; *s; ++s)
+	if(*s == '/')
+		lastslash = s;
+setvar("$@", p->namep, YES);
+setvar("$/", lastslash, YES);
+
+
+/* expand any names that have embedded metacharacters */
+
+for(lp = p->linep ; lp ; lp = lp->nxtlineblock)
+	for(q = lp->depp ; q ; q=qtemp )
+		{
+		qtemp = q->nxtdepblock;
+		expand(q);
+		}
+
+/* make sure all dependents are up to date */
+
+for(lp = p->linep ; lp ; lp = lp->nxtlineblock)
+	{
+	td = 0;
+	for(q = lp->depp ; q ; q = q->nxtdepblock)
+		if(q->depname)
+			{
+			errstat += doname(q->depname, reclevel+1, &td1, q->nowait);
+			if(dbgflag)
+				printf("TIME(%s)=%ld\n",q->depname->namep, td1);
+			if(td1 > td)
+				td = td1;
+			if(ptime < td1)
+				qchain = appendq(qchain, q->depname->namep);
+			allchain = appendq(allchain, q->depname->namep);
+			}
+	if(p->septype == SOMEDEPS)
+		{
+		if(lp->shp)
+		     if( ptime<td || (ptime==0 && td==0) || lp->depp==0)
+			{
+			okdel1 = okdel;
+			okdel = NO;
+			set3var("@", p->namep);
+			setvar("?", mkqlist(qchain,qbuf), YES);
+			setvar("^", mkqlist(allchain,tgsbuf), YES);
+			qchain = NULL;
+			if( !questflag )
+				errstat += docom(lp->shp, nowait, nproc1);
+			set3var("@", CHNULL);
+			okdel = okdel1;
+			ptime1 = prestime();
+			didwork = YES;
+			}
+		}
+
+	else	{
+		if(lp->shp != 0)
+			{
+			if(explcom)
+				fprintf(stderr, "Too many command lines for `%s'\n",
+					p->namep);
+			else	explcom = lp->shp;
+			}
+
+		if(td > tdep) tdep = td;
+		}
+	}
+
+
+
+/* Look for implicit dependents, using suffix rules */
+
+for(lp = sufflist ; lp ; lp = lp->nxtlineblock)
+    for(suffp = lp->depp ; suffp ; suffp = suffp->nxtdepblock)
+	{
+	pnamep = suffp->depname->namep;
+	if(suffix(p->namep , pnamep , prefix))
+		{
+		(void)srchdir(concat(prefix,"*",temp), NO, (depblkp) NULL);
+		for(lp1 = sufflist ; lp1 ; lp1 = lp1->nxtlineblock)
+		    for(suffp1=lp1->depp; suffp1 ; suffp1 = suffp1->nxtdepblock)
+			{
+			p1namep = suffp1->depname->namep;
+			if( (p1=srchname(concat(p1namep, pnamep ,concsuff))) &&
+			    (p2=srchname(concat(prefix, p1namep ,sourcename))) )
+				{
+				errstat += doname(p2, reclevel+1, &td, NO);
+				if(ptime < td)
+					qchain = appendq(qchain, p2->namep);
+if(dbgflag) printf("TIME(%s)=%ld\n", p2->namep, td);
+				if(td > tdep) tdep = td;
+				set3var("*", prefix);
+				set3var("<", copys(sourcename));
+				for(lp2=p1->linep ; lp2 ; lp2 = lp2->nxtlineblock)
+					if(implcom = lp2->shp) break;
+				goto endloop;
+				}
+			}
+		}
+	}
+
+/* Look for implicit dependents, using pattern matching rules */
+
+len = strlen(p->namep);
+for(wp = firstwild ; wp ; wp = wp->next)
+	if(stem = wildmatch(wp, p->namep, len) )
+		{
+		lp = wp->linep;
+		for(q = lp->depp; q; q = q->nxtdepblock)
+			{
+			if(dbgflag>1 && q->depname)
+				fprintf(stderr,"check dep of %s on %s\n", p->namep,
+					wildsub(q->depname->namep,stem));
+			if(q->depname &&
+				! chkname(wildsub(q->depname->namep,stem)))
+					break;
+			}
+
+		if(q)	/* some name not found, go to next line */
+			continue;
+
+		for(q = lp->depp; q; q = q->nxtdepblock)
+			{
+			nameblkp tamep;
+			if(q->depname == NULL)
+				continue;
+			tamep = srchname( wildsub(q->depname->namep,stem));
+/*TEMP fprintf(stderr,"check dep %s on %s =>%s\n",p->namep,q->depname->namep,tamep->namep);*/
+/*TEMP*/if(dbgflag) printf("%s depends on %s. stem=%s\n", p->namep,tamep->namep, stem);
+			errstat += doname(tamep, reclevel+1, &td, q->nowait);
+			if(ptime < td)
+				qchain = appendq(qchain, tamep->namep);
+			allchain = appendq(allchain, tamep->namep);
+			if(dbgflag) printf("TIME(%s)=%ld\n", tamep->namep, td);
+			if(td > tdep)
+				tdep = td;
+			set3var("<", copys(tamep->namep) );
+			}
+		set3var("*", stem);
+		setvar("%", stem, YES);
+		implcom = lp->shp;
+		goto endloop;
+		}
+
+endloop:
+
+
+if(errstat==0 && (ptime<tdep || (ptime==0 && tdep==0) ) )
+	{
+	ptime = (tdep>0 ? tdep : prestime() );
+	set3var("@", p->namep);
+	setvar("?", mkqlist(qchain,qbuf), YES);
+	setvar("^", mkqlist(allchain,tgsbuf), YES);
+	if(explcom)
+		errstat += docom(explcom, nowait, nproc1);
+	else if(implcom)
+		errstat += docom(implcom, nowait, nproc1);
+	else if(p->septype == 0)
+		if(p1=srchname(".DEFAULT"))
+			{
+			set3var("<", p->namep);
+			for(lp2 = p1->linep ; lp2 ; lp2 = lp2->nxtlineblock)
+				if(implcom = lp2->shp)
+					{
+					errstat += docom(implcom, nowait,nproc1);
+					break;
+					}
+			}
+		else if(keepgoing)
+			{
+			printf("Don't know how to make %s\n", p->namep);
+			++errstat;
+			}
+		else
+			fatal1(" Don't know how to make %s", p->namep);
+
+	set3var("@", CHNULL);
+	if(noexflag || nowait || (ptime = exists(p->namep)) == 0 )
+		ptime = prestime();
+	}
+
+else if(errstat!=0 && reclevel==0)
+	printf("`%s' not remade because of errors\n", p->namep);
+
+else if(!questflag && reclevel==0  &&  didwork==NO)
+	printf("`%s' is up to date.\n", p->namep);
+
+if(questflag && reclevel==0)
+	exit(ndocoms>0 ? -1 : 0);
+
+p->done = (errstat ? 3 : 2);
+if(ptime1 > ptime)
+	ptime = ptime1;
+p->modtime = ptime;
+*tval = ptime;
+return errstat;
+}
+
+docom(struct shblock *q, int nowait, int nproc1)
+{
+char *s;
+int ign, nopr, doit;
+char string[OUTMAX];
+
+++ndocoms;
+if(questflag)
+	return NO;
+
+if(touchflag)
+	{
+	s = varptr("@")->varval;
+	if(!silflag)
+		printf("touch(%s)\n", s);
+	if(!noexflag)
+		touch(YES, s);
+	return NO;
+	}
+
+if(nproc1 < nproc)
+	waitstack(nproc1);
+
+for( ; q ; q = q->nxtshblock )
+	{
+	subst(q->shbp,string);
+	ign = ignerr;
+	nopr = NO;
+	doit = NO;
+	for(s = string ; ; ++s)
+		{
+		switch(*s)
+			{
+			case '-':
+				ign = YES;
+				continue;
+			case '@':
+				nopr = YES;
+				continue;
+			case '+':
+				doit = YES;
+				continue;
+			default:
+				break;
+			}
+		break;
+		}
+
+	if( docom1(s, ign, nopr, doit||!noexflag, nowait&&!q->nxtshblock) && !ign)
+		return YES;
+	}
+return NO;
+}
+
+
+static int
+docom1(char *comstring, int nohalt, int noprint, int doit, int nowait)
+{
+int status;
+char *prefix;
+
+if(comstring[0] == '\0')
+	return 0;
+
+if(!silflag && (!noprint || !doit) )
+	prefix = doit ? prompt : "" ;
+else
+	prefix = CHNULL;
+
+if(dynmacro(comstring) || !doit)
+	{
+	if(prefix)
+		{
+		fputs(prefix, stdout);
+		puts(comstring);	/* with a newline */
+		fflush(stdout);
+		}
+	return 0;
+	}
+
+status = dosys(comstring, nohalt, nowait, prefix);
+baddirs();	/* directories may have changed */
+return status;
+}
+
+
+/*
+   If there are any Shell meta characters in the name,
+   expand into a list, after searching directory
+*/
+
+static void
+expand(depblkp q)
+{
+char *s;
+char *s1;
+depblkp p;
+
+s1 = q->depname->namep;
+for(s=s1 ; ;) switch(*s++)
+	{
+	case '\0':
+		return;
+
+	case '*':
+	case '?':
+	case '[':
+		if( p = srchdir(s1 , YES, q->nxtdepblock) )
+			{
+			q->nxtdepblock = p;
+			q->depname = 0;
+			}
+		return;
+	}
+}

+ 288 - 0
sys/src/ape/cmd/make/dosys.c

@@ -0,0 +1,288 @@
+#include "defs.h"
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+
+static int	metas(char *);
+static int	waitproc(int *);
+static int	doshell(char *, int);
+static int	doexec(char *);
+
+int
+dosys(char *comstring, int nohalt, int nowait, char *prefix)
+{
+int status;
+struct process *procp;
+
+/* make sure there is room in the process stack */
+if(nproc >= MAXPROC)
+	waitstack(MAXPROC-1);
+
+/* make sure fewer than proclimit processes are running */
+while(proclive >= proclimit)
+	{
+	enbint(SIG_IGN);
+	waitproc(&status);
+	enbint(intrupt);
+	}
+
+if(prefix)
+	{
+	fputs(prefix, stdout);
+	fputs(comstring, stdout);
+	}
+
+procp = procstack + nproc;
+procp->pid = (forceshell || metas(comstring) ) ?
+	doshell(comstring,nohalt) : doexec(comstring);
+if(procp->pid == -1)
+	fatal("fork failed");
+procstack[nproc].nohalt = nohalt;
+procstack[nproc].nowait = nowait;
+procstack[nproc].done = NO;
+++proclive;
+++nproc;
+
+if(nowait)
+	{
+	printf(" &%d\n", procp->pid);
+	fflush(stdout);
+	return 0;
+	}
+if(prefix)
+	{
+	putchar('\n');
+	fflush(stdout);
+	}
+return waitstack(nproc-1);
+}
+
+static int
+metas(char *s)   /* Are there are any  Shell meta-characters? */
+{
+char c;
+
+while( (funny[c = *s++] & META) == 0 )
+	;
+return( c );
+}
+
+static void
+doclose(void)	/* Close open directory files before exec'ing */
+{
+struct dirhd *od;
+
+for (od = firstod; od; od = od->nxtdirhd)
+	if(od->dirfc)
+		closedir(od->dirfc);
+}
+
+/*  wait till none of the processes in the stack starting at k is live */
+int
+waitstack(int k)
+{
+int npending, status, totstatus;
+int i;
+
+totstatus = 0;
+npending = 0;
+for(i=k ; i<nproc; ++i)
+	if(! procstack[i].done)
+		++npending;
+enbint(SIG_IGN);
+if(dbgflag > 1)
+	printf("waitstack(%d)\n", k);
+
+while(npending>0 && proclive>0)
+	{
+	if(waitproc(&status) >= k)
+		--npending;
+	totstatus |= status;
+	}
+
+if(nproc > k)
+	nproc = k;
+enbint(intrupt);
+return totstatus;
+}
+
+static int
+waitproc(int *statp)
+{
+pid_t pid;
+int status;
+int i;
+struct process *procp;
+char junk[50];
+static int inwait = NO;
+
+if(inwait)	/* avoid infinite recursions on errors */
+	return MAXPROC;
+inwait = YES;
+
+pid = wait(&status);
+if(dbgflag > 1)
+	fprintf(stderr, "process %d done, status = %d\n", pid, status);
+if(pid == -1)
+	{
+	if(errno == ECHILD)	/* multiple deaths, no problem */
+		{
+		if(proclive)
+			{
+			for(i=0, procp=procstack; i<nproc; ++i, ++procp)
+				procp->done = YES;
+			proclive = nproc = 0;
+			}
+		return MAXPROC;
+		}
+	fatal("bad wait code");
+	}
+for(i=0, procp=procstack; i<nproc; ++i, ++procp)
+	if(procp->pid == pid)
+		{
+		--proclive;
+		procp->done = YES;
+
+		if(status)
+			{
+			if(procp->nowait)
+				printf("%d: ", pid);
+			if( WEXITSTATUS(status) )
+				printf("*** Error code %d", WEXITSTATUS(status) );
+			else	printf("*** Termination code %d", WTERMSIG(status));
+		
+			printf(procp->nohalt ? "(ignored)\n" : "\n");
+			fflush(stdout);
+			if(!keepgoing && !procp->nohalt)
+				fatal(CHNULL);
+			}
+		*statp = status;
+		inwait = NO;
+		return i;
+		}
+
+sprintf(junk, "spurious return from process %d", pid);
+fatal(junk);
+/*NOTREACHED*/
+return -1;
+}
+
+static int
+doshell(char *comstring, int nohalt)
+{
+pid_t pid;
+
+if((pid = fork()) == 0)
+	{
+	enbint(SIG_DFL);
+	doclose();
+
+	execl(SHELLCOM, "sh", (nohalt ? "-c" : "-ce"), comstring, NULL);
+	fatal("Couldn't load Shell");
+	}
+
+return pid;
+}
+
+static int
+doexec(char *str)
+{
+char *t, *tend;
+char **argv;
+char **p;
+int nargs;
+pid_t pid;
+
+while( *str==' ' || *str=='\t' )
+	++str;
+if( *str == '\0' )
+	return(-1);	/* no command */
+
+nargs = 1;
+for(t = str ; *t ; )
+	{
+	++nargs;
+	while(*t!=' ' && *t!='\t' && *t!='\0')
+		++t;
+	if(*t)	/* replace first white space with \0, skip rest */
+		for( *t++ = '\0' ; *t==' ' || *t=='\t'  ; ++t)
+			;
+	}
+
+/* now allocate args array, copy pointer to start of each string,
+   then terminate array with a null
+*/
+p = argv = (char **) ckalloc(nargs*sizeof(char *));
+tend = t;
+for(t = str ; t<tend ; )
+	{
+	*p++ = t;
+	while( *t )
+		++t;
+	do	{
+		++t;
+		} while(t<tend && (*t==' ' || *t=='\t') );
+	}
+*p = NULL;
+/*TEMP  for(p=argv; *p; ++p)printf("arg=%s\n", *p);*/
+
+if((pid = fork()) == 0)
+	{
+	enbint(SIG_DFL);
+	doclose();
+	enbint(intrupt);
+	execvp(str, argv);
+	printf("\n");
+	fatal1("Cannot load %s",str);
+	}
+
+free( (char *) argv);
+return pid;
+}
+
+void
+touch(int force, char *name)
+{
+struct stat stbuff;
+char junk[1];
+int fd;
+
+if( stat(name,&stbuff) < 0)
+	if(force)
+		goto create;
+	else
+		{
+		fprintf(stderr, "touch: file %s does not exist.\n", name);
+		return;
+		}
+
+if(stbuff.st_size == 0)
+	goto create;
+
+if( (fd = open(name, O_RDWR)) < 0)
+	goto bad;
+
+if( read(fd, junk, 1) < 1)
+	{
+	close(fd);
+	goto bad;
+	}
+lseek(fd, 0L, SEEK_SET);
+if( write(fd, junk, 1) < 1 )
+	{
+	close(fd);
+	goto bad;
+	}
+close(fd);
+return;
+
+bad:
+	fprintf(stderr, "Cannot touch %s\n", name);
+	return;
+
+create:
+	if( (fd = creat(name, 0666)) < 0)
+		goto bad;
+	close(fd);
+}

+ 552 - 0
sys/src/ape/cmd/make/files.c

@@ -0,0 +1,552 @@
+/* POSIX DEPENDENT PROCEDURES */
+#include "defs.h"
+#include <sys/stat.h>
+#include <ar.h>
+
+#define NAMESPERBLOCK	32
+
+/* DEFAULT RULES FOR POSIX */
+
+char *dfltmacro[] =
+	{
+	".SUFFIXES : .o .c .y .l .a .sh .f",
+	"MAKE=make",
+	"AR=ar",
+	"ARFLAGS=rv",
+	"YACC=yacc",
+	"YFLAGS=",
+	"LEX=lex",
+	"LFLAGS=",
+	"LDFLAGS=",
+	"CC=c89",
+	"CFLAGS=-O",
+	"FC=fort77",
+	"FFLAGS=-O 1",
+	0 };
+
+char *dfltpat[] =
+	{
+	"%.o : %.c",
+	"\t$(CC) $(CFLAGS) -c $<",
+
+	"%.o : %.y",
+	"\t$(YACC) $(YFLAGS) $<",
+	"\t$(CC) $(CFLAGS) -c y.tab.c",
+	"\trm y.tab.c",
+	"\tmv y.tab.o $@",
+
+	"%.o : %.l",
+	"\t$(LEX) $(LFLAGS) $<",
+	"\t$(CC) $(CFLAGS) -c lex.yy.c",
+	"\trm lex.yy.c",
+	"\tmv lex.yy.o $@",
+
+	"%.c : %.y",
+	"\t$(YACC) $(YFLAGS) $<",
+	"\tmv y.tab.c $@",
+
+	"%.c : %.l",
+	"\t$(LEX) $(LFLAGS) $<",
+	"\tmv lex.yy.c $@",
+
+	"% : %.o",
+	"\t$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<",
+
+	"% : %.c",
+	"\t$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<",
+
+	0 };
+
+
+
+char *dfltsuff[] =
+	{
+	".SUFFIXES : .o .c .y .l .a .sh .f",
+	".c.o :",
+	"\t$(CC) $(CFLAGS) -c $<",
+
+	".f.o :",
+	"\t$(FC) $(FFLAGS) -c $<",
+
+	".y.o :",
+	"\t$(YACC) $(YFLAGS) $<",
+	"\t$(CC) $(CFLAGS) -c y.tab.c",
+	"\trm -f y.tab.c",
+	"\tmv y.tab.o $@",
+
+	".l.o :",
+	"\t$(LEX) $(LFLAGS) $<",
+	"\t$(CC) $(CFLAGS) -c lex.yy.c",
+	"\trm -f lex.yy.c",
+	"\tmv lex.yy.o $@",
+
+	".y.c :",
+	"\t$(YACC) $(YFLAGS) $<",
+	"\tmv y.tab.c $@",
+
+	".l.c :",
+	"\t$(LEX) $(LFLAGS) $<",
+	"\tmv lex.yy.c $@",
+
+	".c.a:",
+	"\t$(CC) -c $(CFLAGS) $<",
+	"\t$(AR) $(ARFLAGS) $@ $*.o",
+	"\trm -f $*.o",
+
+	".f.a:",
+	"\t$(FC) -c $(FFLAGS) $<",
+	"\t$(AR) $(ARFLAGS) $@ $*.o",
+	"\trm -f $*.o",
+
+	".c:",
+	"\t$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<",
+
+	".f:",
+	"\t$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $<",
+
+	".sh:",
+	"\tcp $< $@",
+	"\tchmod a+x $@",
+
+	0 };
+
+
+static struct dirhd	*opdir(char *, int);
+static void		cldir(struct dirhd *, int);
+static int		amatch(char *, char *);
+static int		umatch(char *, char *);
+static void		clarch(void);
+static int		openarch(char *);
+static int		getarch(void);
+
+time_t
+exists(char *filename)
+{
+struct stat buf;
+char *s;
+
+for(s = filename ; *s!='\0' && *s!='(' &&  *s!=')' ; ++s)
+	;
+
+if(*s != '\0')
+	return lookarch(filename);
+
+if(stat(filename,&buf) < 0) 
+	return 0;
+else	return buf.st_mtime;
+}
+
+
+time_t
+prestime(void)
+{
+time_t t;
+time(&t);
+return t;
+}
+
+static char nmtemp[MAXNAMLEN+1];	/* guarantees a null after the name */
+static char *tempend = nmtemp + MAXNAMLEN;
+
+
+
+depblkp
+srchdir(char *pat, int mkchain, depblkp nextdbl)
+{
+DIR *dirf;
+struct dirhd *dirptr;
+char *dirname, *dirpref, *endir, *filepat, *p, temp[100];
+char fullname[100];
+nameblkp q;
+depblkp thisdbl;
+struct pattern *patp;
+
+struct dirent *dptr;
+
+thisdbl = 0;
+
+if(mkchain == NO)
+	for(patp=firstpat ; patp ; patp = patp->nxtpattern)
+		if(equal(pat, patp->patval)) return 0;
+
+patp = ALLOC(pattern);
+patp->nxtpattern = firstpat;
+firstpat = patp;
+patp->patval = copys(pat);
+
+endir = 0;
+
+for(p=pat; *p!='\0'; ++p)
+	if(*p=='/') endir = p;
+
+if(endir==0)
+	{
+	dirname = ".";
+	dirpref = "";
+	filepat = pat;
+	}
+else	{
+	dirname = pat;
+	*endir = '\0';
+	dirpref = concat(dirname, "/", temp);
+	filepat = endir+1;
+	}
+
+dirptr = opdir(dirname,YES);
+dirf = dirptr->dirfc;
+
+for( dptr = readdir(dirf) ; dptr ; dptr = readdir(dirf) )
+	{
+	char *p1, *p2;
+	p1 = dptr->d_name;
+	p2 = nmtemp;
+	while( (p2<tempend) && (*p2++ = *p1++)!='\0')
+		;
+	if( amatch(nmtemp,filepat) )
+		{
+		concat(dirpref,nmtemp,fullname);
+		if( (q=srchname(fullname)) ==0)
+			q = makename(copys(fullname));
+		if(mkchain)
+			{
+			thisdbl = ALLOC(depblock);
+			thisdbl->nxtdepblock = nextdbl;
+			thisdbl->depname = q;
+			nextdbl = thisdbl;
+			}
+		}
+	}
+
+
+if(endir)
+	*endir = '/';
+
+cldir(dirptr, YES);
+
+return thisdbl;
+}
+
+static struct dirhd *
+opdir(char *dirname, int stopifbad)
+{
+struct dirhd *od;
+
+for(od = firstod; od; od = od->nxtdirhd)
+	if(equal(dirname, od->dirn) )
+		break;
+
+if(od == NULL)
+	{
+	++nopdir;
+	od = ALLOC(dirhd);
+	od->nxtdirhd = firstod;
+	firstod = od;
+	od->dirn = copys(dirname);
+	}
+
+if(od->dirfc==NULL && (od->dirfc = opendir(dirname)) == NULL && stopifbad)
+	{
+	fprintf(stderr, "Directory %s: ", dirname);
+	fatal("Cannot open");
+	}
+
+return od;
+}
+
+
+static void
+cldir(struct dirhd *dp, int used)
+{
+if(nopdir >= MAXDIR)
+	{
+	closedir(dp->dirfc);
+	dp->dirfc = NULL;
+	}
+else if(used)
+	rewinddir(dp->dirfc); /* start over at the beginning  */
+}
+
+/* stolen from glob through find */
+
+static int
+amatch(char *s, char *p)
+{
+	int cc, scc, k;
+	int c, lc;
+
+	scc = *s;
+	lc = 077777;
+	switch (c = *p) {
+
+	case '[':
+		k = 0;
+		while (cc = *++p) {
+			switch (cc) {
+
+			case ']':
+				if (k)
+					return amatch(++s, ++p);
+				else
+					return 0;
+
+			case '-':
+				k |= (lc <= scc)  & (scc <= (cc=p[1]) ) ;
+			}
+			if (scc==(lc=cc)) k++;
+		}
+		return 0;
+
+	case '?':
+	caseq:
+		if(scc) return amatch(++s, ++p);
+		return 0;
+	case '*':
+		return umatch(s, ++p);
+	case 0:
+		return !scc;
+	}
+	if (c==scc) goto caseq;
+	return 0;
+}
+
+static int
+umatch(char *s, char *p)
+{
+	if(*p==0) return 1;
+	while(*s)
+		if (amatch(s++,p)) return 1;
+	return 0;
+}
+
+#ifdef METERFILE
+#include <pwd.h>
+int meteron	= 0;	/* default: metering off */
+
+extern void meter(char *file)
+{
+time_t tvec;
+char *p;
+FILE * mout;
+struct passwd *pwd;
+
+if(file==0 || meteron==0) return;
+
+pwd = getpwuid(getuid());
+
+time(&tvec);
+
+if( mout = fopen(file,"a") )
+	{
+	p = ctime(&tvec);
+	p[16] = '\0';
+	fprintf(mout, "User %s, %s\n", pwd->pw_name, p+4);
+	fclose(mout);
+	}
+}
+#endif
+
+
+/* look inside archives for notation a(b)
+	a(b)	is file member   b   in archive a
+*/
+
+static long arflen;
+static long arfdate;
+static char arfname[16];
+FILE *arfd;
+long int arpos, arlen;
+
+time_t
+lookarch(char *filename)
+{
+char *p, *q, *send, s[15], pad;
+int i, nc, nsym;
+
+for(p = filename; *p!= '(' ; ++p)
+	;
+
+*p = '\0';
+if( ! openarch(filename) )
+	{
+	*p = '(';
+	return 0L;
+	}
+*p++ = '(';
+nc = 14;
+pad = ' ';
+
+send = s + nc;
+for( q = s ; q<send && *p!='\0' && *p!=')' ; *q++ = *p++ )
+	;
+if(p[0]==')' && p[1]!='\0')	/* forbid stuff after the paren */
+	{
+	clarch();
+	return 0L;
+	}
+while(q < send)
+	*q++ = pad;
+while(getarch())
+	{
+	if( !strncmp(arfname, s, nc))
+		{
+		clarch();
+/*TEMP fprintf(stderr, "found archive member %14s, time=%d\n", s, arfdate); */
+		return arfdate;
+		}
+	}
+
+clarch();
+return  0L;
+}
+
+static void
+clarch(void)
+{
+fclose( arfd );
+}
+
+static int
+openarch(char *f)
+{
+char magic[SARMAG];
+int word;
+struct stat buf;
+nameblkp p;
+
+stat(f, &buf);
+arlen = buf.st_size;
+
+arfd = fopen(f, "r");
+if(arfd == NULL)
+	return NO;
+	/* fatal1("cannot open %s", f); */
+
+fread( (char *) &word, sizeof(word), 1, arfd);
+
+fseek(arfd, 0L, 0);
+fread(magic, SARMAG, 1, arfd);
+arpos = SARMAG;
+if( strncmp(magic, ARMAG, SARMAG) )
+	fatal1("%s is not an archive", f);
+
+if( !(p = srchname(f)) )
+	p = makename( copys(f) );
+p->isarch = YES;
+arflen = 0;
+return YES;
+}
+
+
+static int
+getarch(void)
+{
+struct ar_hdr arhead;
+
+arpos += (arflen + 1) & ~1L;	/* round archived file length up to even */
+if(arpos >= arlen)
+	return 0;
+fseek(arfd, arpos, 0);
+
+fread( (char *) &arhead, sizeof(arhead), 1, arfd);
+arpos += sizeof(arhead);
+arflen = atol(arhead.ar_size);
+arfdate = atol(arhead.ar_date);
+strncpy(arfname, arhead.ar_name, sizeof(arhead.ar_name));
+return 1;
+}
+
+/* find the directory containing name.
+   read it into the hash table if it hasn't been used before or if
+   if might have changed since last reference
+*/
+
+void
+dirsrch(char *name)
+{
+DIR *dirf;
+struct dirhd *dirp;
+time_t dirt, objt;
+int dirused, hasparen;
+char *dirname, *lastslash;
+char *fullname, *filepart, *fileend, *s;
+struct dirent *dptr;
+
+lastslash = NULL;
+hasparen = NO;
+
+for(s=name; *s; ++s)
+	if(*s == '/')
+		lastslash = s;
+	else if(*s=='(' || *s==')')
+		hasparen = YES;
+
+if(hasparen)
+	{
+	if(objt = lookarch(name))
+		makename(name)->modtime = objt;
+	return;
+	}
+
+if(lastslash)
+	{
+	dirname = name;
+	*lastslash = '\0';
+	}
+else
+	dirname = ".";
+
+dirused = NO;
+dirp = opdir(dirname, NO);
+dirf = dirp->dirfc;
+if(dirp->dirok || !dirf)
+	goto ret;
+dirt = exists(dirname);
+if(dirp->dirtime == dirt)
+	goto ret;
+
+dirp->dirok = YES;
+dirp->dirtime = dirt;
+dirused = YES;
+
+/* allocate buffer to hold full file name */
+if(lastslash)
+	{
+	fullname = (char *) ckalloc(strlen(dirname)+MAXNAMLEN+2);
+	concat(dirname, "/", fullname);
+	filepart = fullname + strlen(fullname);
+	}
+else
+	filepart = fullname = (char *) ckalloc(MAXNAMLEN+1);
+
+
+fileend = filepart + MAXNAMLEN;
+*fileend = '\0';
+for(dptr = readdir(dirf) ; dptr ; dptr = readdir(dirf) )
+	{
+	char *p1, *p2;
+	p1 = dptr->d_name;
+	p2 = filepart;
+	while( (p2<fileend) && (*p2++ = *p1++)!='\0')
+		;
+	if( ! srchname(fullname) )
+		(void) makename(copys(fullname));
+	}
+
+free(fullname);
+
+ret:
+	cldir(dirp, dirused);
+	if(lastslash)
+		*lastslash = '/';
+}
+
+
+
+void
+baddirs(void)
+{
+struct dirhd *od;
+
+for(od = firstod; od; od = od->nxtdirhd)
+	od->dirok = NO;
+}

Some files were not shown because too many files changed in this diff