Browse Source

Removing ancient scsi experiment and obsolete spelling/dictionary tools

Signed-off-by: Álvaro Jurado <elbingmiss@gmail.com>
Álvaro Jurado 1 year ago
parent
commit
d8c0151af9
54 changed files with 0 additions and 47544 deletions
  1. 0 96
      sys/man/1/spell
  2. 0 163
      sys/man/7/dict
  3. 0 413
      sys/man/8/scuzz
  4. 0 148
      sys/src/cmd/dict/ahd.c
  5. 0 29
      sys/src/cmd/dict/canonind.awk
  6. 0 56
      sys/src/cmd/dict/comfix.awk
  7. 0 684
      sys/src/cmd/dict/dict.c
  8. 0 172
      sys/src/cmd/dict/dict.h
  9. 0 15
      sys/src/cmd/dict/egfix
  10. 0 8
      sys/src/cmd/dict/egfix2
  11. 0 123
      sys/src/cmd/dict/fromemf
  12. 0 1117
      sys/src/cmd/dict/gb2312.c
  13. 0 23
      sys/src/cmd/dict/gefix
  14. 0 8
      sys/src/cmd/dict/getneeds
  15. 0 1068
      sys/src/cmd/dict/jis208.c
  16. 0 123
      sys/src/cmd/dict/kuten.h
  17. 0 23
      sys/src/cmd/dict/mkfile
  18. 0 115
      sys/src/cmd/dict/mkindex.c
  19. 0 23
      sys/src/cmd/dict/mkroget
  20. 0 333
      sys/src/cmd/dict/movie.c
  21. 0 1434
      sys/src/cmd/dict/oed.c
  22. 0 235
      sys/src/cmd/dict/pcollins.c
  23. 0 257
      sys/src/cmd/dict/pcollinsg.c
  24. 0 1170
      sys/src/cmd/dict/pgw.c
  25. 0 6
      sys/src/cmd/dict/rev.awk
  26. 0 321
      sys/src/cmd/dict/robert.c
  27. 0 156
      sys/src/cmd/dict/roget.c
  28. 0 55
      sys/src/cmd/dict/simple.c
  29. 0 212
      sys/src/cmd/dict/slang.c
  30. 0 13
      sys/src/cmd/dict/t.awk
  31. 0 95
      sys/src/cmd/dict/thesaurus.c
  32. 0 559
      sys/src/cmd/dict/utils.c
  33. 0 193
      sys/src/cmd/dict/world.c
  34. 0 168
      sys/src/cmd/scuzz/cdaudio.c
  35. 0 228
      sys/src/cmd/scuzz/cdr.c
  36. 0 71
      sys/src/cmd/scuzz/changer.c
  37. 0 15
      sys/src/cmd/scuzz/mkfile
  38. 0 54
      sys/src/cmd/scuzz/mo.words
  39. 0 765
      sys/src/cmd/scuzz/scsireq.c
  40. 0 160
      sys/src/cmd/scuzz/scsireq.h
  41. 0 1967
      sys/src/cmd/scuzz/scuzz.c
  42. 0 53
      sys/src/cmd/scuzz/sense.c
  43. 0 416
      sys/src/cmd/spell/american
  44. BIN
      sys/src/cmd/spell/amspell
  45. 0 411
      sys/src/cmd/spell/british
  46. BIN
      sys/src/cmd/spell/brspell
  47. 0 37
      sys/src/cmd/spell/code.h
  48. 0 29383
      sys/src/cmd/spell/list
  49. 0 840
      sys/src/cmd/spell/local
  50. 0 46
      sys/src/cmd/spell/mkfile
  51. 0 345
      sys/src/cmd/spell/pcode.c
  52. 0 21
      sys/src/cmd/spell/spell.rc
  53. 0 1386
      sys/src/cmd/spell/sprog.c
  54. 0 1732
      sys/src/cmd/spell/stop

+ 0 - 96
sys/man/1/spell

@@ -1,96 +0,0 @@
-.TH SPELL 1
-.SH NAME
-spell, sprog \- find spelling errors
-.SH SYNOPSIS
-.B spell
-[
-.I options
-]
-\&...
-[
-.I file
-]
-\&...
-.PP
-.B aux/sprog
-[
-.I options
-]
-[
-.B -f
-.I file
-]
-.SH DESCRIPTION
-.I Spell
-looks up words from the named
-.I files
-(standard input default)
-in a spelling list and places
-possible misspellings\(emwords 
-not sanctioned there\(emon the standard output.
-.PP
-.I Spell
-ignores constructs of
-.IR troff (1)
-and its standard preprocessors.
-It understands these options:
-.TP
-.B -b
-Check British spelling.
-.TP
-.B -v
-Print all words not literally in the spelling list, with
-derivations.
-.TP
-.B -x
-Print on standard error, marked with
-.LR = ,
-every stem as it is looked up in the spelling list,
-along with its affix classes.
-.PP
-As a matter of policy, 
-.I spell
-does not admit multiple spellings of the same word.
-Variants that follow general rules are preferred
-over those that don't, even when the unruly spelling is
-more common.
-Thus, in American usage, `modelled', `sizeable', and `judgment' are
-rejected in favor of `modeled', `sizable', and `judgement'.
-Agglutinated variants are shunned: `crewmember' and `backyard'
-cede to `crew member' and  `back yard' (noun) or `back-yard' 
-(adjective).
-.SH FILES
-.TF \fL/sys/lib/brspell
-.TP
-.B /sys/lib/amspell
-American spelling list
-.TP
-.B /sys/lib/brspell
-British spelling list
-.TP
-.B /bin/aux/sprog
-The actual spelling checker.
-It expects one word per line on standard input,
-and takes the same arguments as
-.IR spell .
-.SH SOURCE
-.TF /sys/src/cmd/spell
-.TP
-.B /rc/bin/spell
-the script
-.TP
-.B /sys/src/cmd/spell
-source for
-.I sprog
-.SH SEE ALSO
-.IR deroff (1)
-.SH BUGS
-The heuristics of
-.IR deroff (1)
-used to excise formatting information are imperfect.
-.PP
-The spelling list's coverage is uneven;
-in particular biology, medicine, and chemistry, and
-perforce proper names,
-not to mention languages other than English,
-are covered very lightly.

+ 0 - 163
sys/man/7/dict

@@ -1,163 +0,0 @@
-.TH DICT 7
-.SH NAME
-dict \- dictionary browser
-.SH SYNOPSIS
-.B dict
-[
-.B -k
-]
-[
-.B -d
-.I dictname
-]
-[
-.B -c
-.I command
-]
-[
-.I pattern
-]
-.SH DESCRIPTION
-.I Dict
-is a dictionary browser.
-If a
-.I pattern
-is given on the command line,
-.I dict
-prints all matching entries;
-otherwise it repeatedly accepts and executes commands.
-The options are
-.TF -d\ \fIdictname\fP
-.TP
-.BI -d " dictname"
-Use the given dictionary.
-The default is
-.BR oed ,
-the second edition of the Oxford English Dictionary.
-A list of available dictionaries is printed by option
-.BR -d? .
-.TP
-.BI -c " command"
-Execute one command and quit.
-The command syntax is described below.
-.TP
-.B -k
-Print a pronunciation key.
-.PD
-.PP
-Patterns are regular expressions (see
-.IR regexp (6)),
-with an implicit leading
-.L ^
-and trailing
-.LR $ .
-Patterns are matched against an index of headwords and variants,
-to form a `match set'.
-By default, both patterns and the index are folded:
-upper case characters are mapped into their lower case equivalents,
-and accented characters are mapped into their base
-equivalents.
-In interactive mode, there is always a `current match set'
-and a `current entry' within the match set.
-Commands can change either or both, as well as print the entries
-or information about them.
-.PP
-Commands have an address followed by a command letter.
-Addresses have the form:
-.TF /\fIre\fP/.\fIn\fP
-.TP
-.BI / re /
-Set the match set to all entries matching the regular expression
-.IR re ,
-sorted in dictionary order.
-Set the current entry to the first of the match set.
-.TP
-.BI ! re !
-Like
-.BI / re /
-but use exact matching, i.e., without case and accent folding.
-.TP
-.I n
-An integer
-.I n
-means change the current entry to the
-.IR n th
-of the current match set.
-.TP
-.BI # n
-The integer
-.I n
-is an absolute byte offset into the raw dictionary.
-(See the
-.B A
-command, below.)
-.TP
-.IB addr +
-After setting the match set and current entry according to
-.IR addr ,
-change the match set and current entry to be the next entry
-in the dictionary (not necessarily in the match set) after
-the current entry.
-.TP
-.IB addr -
-Like
-.IB addr +
-but go to previous dictionary entry.
-.PD
-.PP
-The command letters come in pairs: a lower case and the
-corresponding upper case letter.
-The lower case version prints something about the current
-entry only, and advances the current entry to the next
-in the match set (wrapping around to the beginning after
-the last).
-The upper case version prints something about all of the
-match set and resets the current entry to the beginning of
-the set.
-.TF \fLa,A\fP
-.TP
-.BR p , P
-Print the whole entry.
-.TP
-.BR h , H
-Print only the headword(s) of the entry.
-.TP
-.BR a , A
-Print the dictionary byte offset of the entry.
-.TP
-.BR r , R
-Print the whole entry in raw format (without translating
-special characters, etc.).
-.PD
-.PP
-If no command letter is given for the first command,
-.B H
-is assumed.
-After an
-.BR H ,
-the default command is
-.BR p .
-Otherwise, the default command is the previous command.
-.SH FILES
-.B /lib/dict/oed2
-.br
-.B /lib/dict/oed2index
-.br
-Other files in
-.BR /lib .
-.SH "SEE ALSO"
-.IR regexp (6)
-.SH SOURCE
-.B /sys/src/cmd/dict
-.SH BUGS
-A font with wide coverage of the Unicode Standard
-should be used for best results.
-(Try
-.BR /lib/font/bit/pelm/unicode.9.font .)
-.br
-If the
-.I pattern
-doesn't begin with
-a few literal characters, matching takes a long time.
-.br
-The dictionaries are not distributed outside Bell Labs.

+ 0 - 413
sys/man/8/scuzz

@@ -1,413 +0,0 @@
-.TH SCUZZ 8
-.SH NAME
-scuzz \- SCSI target control
-.SH SYNOPSIS
-.B scuzz
-[
-.B -6eq
-] [
-.B -m
-.I max-xfer
-] [
-[
-.B -r
-]
-.I sddev
-]
-.SH DESCRIPTION
-.I Scuzz
-is an interactive program for exercising
-raw SCSI devices.
-Its intended purpose is to investigate and manipulate
-odd devices without the effort of writing a special driver,
-such as shuffling the media around on an optical jukebox.
-It reads commands from standard input and applies them to a SCSI target
-(other devices accessed through the
-.IR sd (3)
-interface,
-such as ATA(PI) devices,
-may also work).
-If
-.I sddev
-is given on the command line, an
-.B open
-(see below)
-is immediately applied to the target.
-On successful completion of a command,
-.BI ok " n
-is printed, where
-.I n
-is the number of bytes transferred to/from the target;
-the
-.B -q
-command line option suppresses the
-.B ok
-message.
-.LP
-The
-.B -6
-forces the use of 6-byte SCSI commands rather than 10-byte ones.
-Some older devices require this, though
-.I scuzz
-attempts to adapt automatically.
-The
-.B -e
-makes
-.I scuzz
-more willing to retry I/O errors but less tolerant of other errors
-and implies
-.BR -6 .
-This option is often needed to read Exabyte 8mm tapes.
-The
-.B -m
-option sets the maximum I/O transfer size to
-.IR max-xfer .
-Exabyte drives often require this to be 1024 or the exact tape block size
-and some 4mm drives require this to be the exact tape block size or larger.
-.SS Commands
-.TF "inquiry"
-.PD
-.TP
-.BI help " command
-.B Help
-is rudimentary and prints a one line synopsis for the named
-.IR command ,
-or for all commands if no argument is given.
-.TP
-.B probe
-.B Probe
-attempts an
-.B inquiry
-command on all SCSI units,
-and prints the result preceded by the name of those
-targets which respond.
-.LP
-The
-.B help
-and
-.B probe
-commands may be given at any time.
-.TF "inquiry"
-.PD
-.TP
-.BI open\ [ -r ] sddev
-.B Open
-must be given before any of the remaining commands will be accepted.
-Internally,
-unless the
-.B -r
-option is given,
-.B open
-issues
-.B ready
-then
-.BR inquiry ,
-followed by a device class-specific command to determine the
-logical block size of the target.
-.I Sddev
-is an
-.IR sd (3)
-device directory like
-.IR /dev/sdC0 .
-.TP
-.B close
-.B Close
-need only be given if another target is to be opened in the current
-session.
-.LP
-The remaining commands are in rough groups,
-intended for specific classes of device.
-With the exception of the
-.BR read ,
-.BR write ,
-and
-.B space
-commands,
-all arguments are in the style of ANSI-C integer constants.
-.TF "inquiry"
-.PD
-.TP
-.B ready
-Test Unit Ready
-checks if the unit is powered up and ready to do
-.B read
-and
-.B write
-commands.
-.TP
-.B rezero
-Rezero
-Unit requests that a disk be brought to a known state,
-usually by seeking to track zero.
-.TP
-.B rewind
-.B Rewind
-positions a tape at the beginning of current partition
-(there is usually only one partition, the beginning of tape).
-.TP
-.B reqsense
-Request Sense retrieves Sense Data concerning an error or
-other condition and is usually issued following the completion of a command
-that had check-condition status.
-.I Scuzz
-automatically issues a
-.B reqsense
-in response to a check-condition status and prints the result.
-.TP
-.B format
-Format
-Unit performs a ``low level'' format of a disk.
-.TP
-.B rblimits
-Read Block Limits
-reports the possible block lengths for the logical unit. Tapes only.
-.TP
-.BI read " file nbytes
-.B Read
-transfers data from the target to the host.
-A missing
-.I nbytes
-causes the entire device to be read.
-.TP
-.BI write " file nbytes
-.B Write
-transfers data from the host to the target.
-A missing
-.I nbytes
-causes the entire input file to be transferred.
-.IP
-The first argument to the
-.BR read
-and
-.BR write
-commands specifies a source
-.RB ( write )
-or destination
-.RB ( read )
-for the I/O.
-The argument is either a plain file name or
-.B |
-followed by a command to be executed by
-.IR rc (1).
-The argument may be quoted in the style of
-.IR rc (1).
-.TP
-.BI seek " offset whence
-.B Seek
-requests the target to seek to a position on a disk,
-arguments being in the style of
-.IR seek (2);
-.I whence
-is 0 by default.
-.IP
-.I Scuzz
-maintains an internal notion of where the current target
-is positioned.
-The
-.BR seek ,
-.BR read ,
-.BR write ,
-.BR rewind ,
-.BR rezero ,
-and
-.B wtrack
-commands all manipulate the internal offset.
-.TP
-.BI filemark " howmany
-Write Filemarks
-writes one (default) or more filemarks on a tape.
-.TP
-.BI space\ [ -b ]\ [ -f ]\ [[ "--\fP]\fIhowmany\fP]"
-.B Space
-positions a tape forwards or backwards.
-The arguments
-specify logical block
-.RB ( -b )
-or
-filemark
-.RB ( -f )
-spacing;
-default is
-.BR -b .
-If
-.I howmany
-is negative
-it specifies spacing backwards,
-and should be preceded by
-.B --
-to turn off any further
-option processing.
-Default is 1.
-.TP
-.B inquiry
-.B Inquiry
-is issued to determine the device type of a particular target,
-and to determine some basic information about the implemented options and
-the product name.
-.TP
-.BI modeselect bytes...
-.TP
-.BI modeselect6 bytes...
-Mode
-Select
-is issued to set variable parameters in the target.
-.I Bytes
-given as arguments comprise all the data for the target;
-see an appropriate manual for the format.
-The default is the 10-byte form of the command;
-modeselect6 is the 6-byte version.
-.TP
-.BI modesense\ [ page [ nbytes ]]
-.TP
-.BI modesense6\ [ page [ nbytes ]]
-Mode
-Sense
-reports variable and fixed parameters from the target.
-If no
-.I page
-is given,
-all pages are returned.
-.I Nbytes
-specifies how many bytes should be returned.
-The default is the 10-byte form of the command;
-modesense6 is the 6-byte version.
-.TP
-.BI start\ [ code ]
-.TP
-.BI stop\ [ code ]
-.TP
-.BI eject\ [ code ]
-.TP
-.BI ingest\ [ code ]
-.BR Start ,
-.BR stop ,
-.BR eject ,
-and
-.B ingest
-are synonyms for Start/Stop Unit with different default values of
-.IR code .
-Start/Stop Unit is typically used to spin up and spin down a rotating
-disk drive.
-.I Code
-is 0 to stop,
-1 to start and
-3 to eject (if the device supports ejection of the medium).
-.TP
-.B capacity
-Read Capacity reports the number of blocks and the block
-size of a disk.
-.LP
-The following commands are specific to CD and CD-R/RW devices.
-A brief description of each is given; see the SCSI-3
-Multimedia Commands (MMC) Specification for details of arguments
-and interpretation of the results.
-.TF "inquiry"
-.PD
-.TP
-.BI blank\ [ track/LBA [ type ]]
-Erase a CD-RW disk.
-Type identifies the method and coverage of the blanking.
-.TP
-.BI rtoc\ [ track/session-number [ ses ]]
-The Read TOC/PMA command transfers data from one of the tables of contents
-(TOC or PMA) on the CD medium.
-.TP
-.B rdiscinfo
-(Note the spelling.)
-Provides information about disks, including incomplete CD-R/RW.
-.TP
-.BI rtrackinfo\ [ track ]
-Provides information about a track, regardless of its status.
-.TP
-.B cdpause
-.TP
-.B cdresume
-Pause/resume playback.
-.TP
-.B cdstop
-Stop playback.
-.TP
-.BI cdplay\ [ track-number ]\ or\ [ -r [ "LBA\fP[\fIlength\fP]]]"
-Play audio.
-With no arguments, starts at the beginning of the medium.
-If a track number is given, the table of contents is read
-to find the playback start point.
-If the
-.B -r
-option is given, block addressing is used to find the
-playback start point.
-.TP
-.BI cdload\ [ slot ]
-.TP
-.BI cdunload\ [ slot ]
-Load/unload a disk from a changer.
-.TP
-.B cdstatus
-Read the mechanism status.
-.LP
-The following commands are specific to Media Changer devices.
-A brief description of each is given; see the SCSI-3
-Medium Changer Commands (SMC) Specification for details of arguments.
-.TF "inquiry"
-.PD
-.TP
-.B einit
-Initialize element status.
-.TP
-.BI "estatus " "type " [ length ]
-Report the status of the internal elements.
-Type 0 reports all element types.
-.TP
-.BI "mmove " transport\ source\ destination [ invert ]
-Move medium.
-.SH FILES
-.TF /dev/sdXX/raw
-.TP
-.B /dev/\fIsdXX\fP/raw
-raw SCSI interface for command, I/O, and status.
-.SH SOURCE
-.B /sys/src/cmd/scuzz
-.SH "SEE ALSO"
-.IR sd (3)
-.br
-.IR "Small Computer System Interface - 2 (X3T9.2/86-109)" ,
-Global Engineering Documents
-.br
-.IR "SCSI Bench Reference" ,
-ENDL Publications
-.br
-.IR "SCSI-3 Multimedia Commands (MMC) Specification" ,
-www.t10.org
-.br
-.IR "SCSI-3 Medium Changer Commands (SMC) Specification" ,
-www.t10.org
-.SH BUGS
-Only a limited subset of SCSI commands has been implemented (as needed).
-.LP
-Only one target can be open at a time.
-.LP
-LUNs other than 0 are not supported.
-.LP
-No way to force 10-byte commands, though they are the default.
-.LP
-Should be recoded to use
-.IR scsi (2)
-in order to get more complete sense code descriptions.
-.LP
-.I Scuzz
-betrays its origins by spelling
-.B rdiscinfo
-with a
-.B c
-even though the devices it manipulates are spelled with a
-.BR k .
-.LP
-The
-.I max-xfer
-value is currently limited to 245760
-to limit kernel memory consumption.
-.LP
-It may be necessary to set
-.I max-xfer
-to exactly the block size used to write a tape
-in order to read it on some drives.

+ 0 - 148
sys/src/cmd/dict/ahd.c

@@ -1,148 +0,0 @@
-/*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
-
-#include <u.h>
-#include <libc.h>
-#include <bio.h>
-#include "dict.h"
-
-/*
- * American Heritage Dictionary (encrypted)
- */
-
-static Rune intab[256] = {
-	[0x82] = L'é',
-	[0x85] = L'à',
-	[0x89] = L'ë',
-	[0x8a] = L'è',
-	[0xa4] = L'ñ',
-	[0xf8] = L'°',
-	[0xf9] = L'·',
-};
-
-static char	tag[64];
-
-enum{
-	Run, Openper, Openat, Closeat
-};
-
-void
-ahdprintentry(Entry e, int cmd)
-{
-	static int inited;
-	int32_t addr;
-	char *p, *t = tag;
-	int obreaklen;
-	int c, state = Run;
-
-	if(!inited){
-		for(c=0; c<256; c++)
-			if(intab[c] == 0)
-				intab[c] = c;
-		inited = 1;
-	}
-	obreaklen = breaklen;
-	breaklen = 80;
-	addr = e.doff;
-	for(p=e.start; p<e.end; p++){
-		c = intab[(*p ^ (addr++>>1))&0xff];
-		switch(state){
-		case Run:
-			if(c == '%'){
-				t = tag;
-				state = Openper;
-				break;
-			}
-		Putchar:
-			if(c == '\n')
-				outnl(0);
-			else if(c < Runeself)
-				outchar(c);
-			else
-				outrune(c);
-			break;
-
-		case Openper:
-			if(c == '@')
-				state = Openat;
-			else{
-				outchar('%');
-				state = Run;
-				goto Putchar;
-			}
-			break;
-
-		case Openat:
-			if(c == '@')
-				state = Closeat;
-			else if(t < &tag[sizeof tag-1])
-				*t++ = c;
-			break;
-
-		case Closeat:
-			if(c == '%'){
-				*t = 0;
-				switch(cmd){
-				case 'h':
-					if(strcmp("EH", tag) == 0)
-						goto out;
-					break;
-				case 'r':
-					outprint("%%@%s@%%", tag);
-					break;
-				}
-				state = Run;
-			}else{
-				if(t < &tag[sizeof tag-1])
-					*t++ = '@';
-				if(t < &tag[sizeof tag-1])
-					*t++ = c;
-				state = Openat;
-			}
-			break;
-		}
-	}
-out:
-	outnl(0);
-	breaklen = obreaklen;
-}
-
-int32_t
-ahdnextoff(int32_t fromoff)
-{
-	static char *patterns[] = { "%@NL@%", "%@2@%", 0 };
-	int c, k = 0, state = 0;
-	char *pat = patterns[0];
-	int32_t defoff = -1;
-
-	if(Bseek(bdict, fromoff, 0) < 0)
-		return -1;
-	while((c = Bgetc(bdict)) >= 0){
-		c ^= (fromoff++>>1)&0xff;
-		if(c != pat[state]){
-			state = 0;
-			continue;
-		}
-		if(pat[++state])
-			continue;
-		if(pat = patterns[++k]){
-			state = 0;
-			defoff = fromoff-6;
-			continue;
-		}
-		return fromoff-5;
-	}
-	return defoff;
-}
-
-void
-ahdprintkey(void)
-{
-	Bprint(bout, "No pronunciations.\n");
-}

+ 0 - 29
sys/src/cmd/dict/canonind.awk

@@ -1,29 +0,0 @@
-# turn output of mkindex into form needed by dict
-BEGIN {
-	if(ARGC != 2) {
-		print "Usage: awk -F'	' -f canonind.awk rawindex > index"
-		exit 1
-	}
-	file = ARGV[1]
-	ARGV[1] = ""
-	while ((getline < file) > 0) {
-		for(i = 2; i <= NF; i++) {
-			w = $i
-			if(length(w) == 0)
-				continue
-			b = index(w, "(")
-			e = index(w, ")")
-			if(b && e && b < e) {
-				w1 = substr(w, 1, b-1)
-				w2 = substr(w, b+1, e-b-1)
-				w3 =  substr(w, e+1)
-				printf "%s%s\t%d\n", w1, w3, $1 > "junk"
-				printf "%s%s%s\t%d\n", w1, w2, w3, $1 > "junk"
-			} else
-				printf "%s\t%d\n", w, $1 > "junk"
-		}
-	}
-	system("sort -u -t'	' +0f -1 +0 -1 +1n -2 < junk")
-	system("rm junk")
-	exit 0
-}

+ 0 - 56
sys/src/cmd/dict/comfix.awk

@@ -1,56 +0,0 @@
-# when raw index has a lot of entries like
-# 1578324	problematico, a, ci, che
-# apply this algorithm:
-#  treat things after comma as suffixes
-#  for each suffix:
-#      if single letter, replace last letter
-#      else search backwards for beginning of suffix
-#      and if it leads to an old suffix of approximately
-#      the same length, put replace that suffix
-# This will still leave some commas to fix by hand
-# Usage: awk -F'	' -f comfix.awk rawindex > newrawindex
-
-NF == 2	{
-		i = index($2, ",")
-		if(i == 0 || length($2) == 0)
-			print $0
-		else {
-			n = split($2, a, /,[ ]*/)
-			w = a[1]
-			printf "%s\t%s\n", $1, w
-			for(i = 2; i <= n; i++) {
-				suf = a[i]
-				m = matchsuflen(w, suf)
-				if(m) {
-					nw = substr(w, 1, length(w)-m) suf
-					printf "%s\t%s\n", $1, nw
-				} else
-					printf "%s\t%s\n", $1, w ", " suf
-			}
-		}
-	}
-NF != 2 {
-	print $0
-	}
-
-function matchsuflen(w, suf,		wlen,suflen,c,pat,k,d)
-{
-	wlen = length(w)
-	suflen = length(suf)
-	if(suflen == 1)
-		return 1
-	else {
-		c = substr(suf, 1, 1)
-		for (k = 1; k <= wlen ; k++)
-			if(substr(w, wlen-k+1, 1) == c)
-				break
-		if(k > wlen)
-			return 0
-		d = k-suflen
-		if(d < 0)
-			d = -d
-		if(d > 3)
-			return 0
-		return k
-	}
-}

+ 0 - 684
sys/src/cmd/dict/dict.c

@@ -1,684 +0,0 @@
-/*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
-
-#include <u.h>
-#include <libc.h>
-#include <bio.h>
-#include <regexp.h>
-#include <ctype.h>
-#include "dict.h"
-
-/*
- * Assumed index file structure: lines of form
- * 	[^\t]+\t[0-9]+
- * First field is key, second is byte offset into dictionary.
- * Should be sorted with args -u -t'	' +0f -1 +0 -1 +1n -2
- */
-typedef struct Addr Addr;
-
-struct Addr {
-	int	n;		/* number of offsets */
-	int	cur;		/* current position within doff array */
-	int	maxn;		/* actual current size of doff array */
-	uint32_t	doff[1];	/* doff[maxn], with 0..n-1 significant */
-};
-
-Biobuf	binbuf;
-Biobuf	boutbuf;
-Biobuf	*bin = &binbuf;		/* user cmd input */
-Biobuf	*bout = &boutbuf;	/* output */
-Biobuf	*bdict;			/* dictionary */
-Biobuf	*bindex;		/* index file */
-int32_t	indextop;		/* index offset at end of file */
-int	lastcmd;		/* last executed command */
-Addr	*dot;			/* "current" address */
-Dict	*dict;			/* current dictionary */
-int	linelen;
-int	breaklen = 60;
-int	outinhibit;
-int	debug;
-
-void	execcmd(int);
-int	getpref(char*, Rune*);
-Entry	getentry(int);
-int	getfield(Rune*);
-int32_t	locate(Rune*);
-int	parseaddr(char*, char**);
-int	parsecmd(char*);
-int	search(char*, int);
-int32_t	seeknextline(Biobuf*, int32_t);
-void	setdotnext(void);
-void	setdotprev(void);
-void	sortaddr(Addr*);
-void	usage(void);
-
-enum {
-	Plen=300,	/* max length of a search pattern */
-	Fieldlen=200,	/* max length of an index field */
-	Aslots=10,	/* initial number of slots in an address */
-};
-
-void
-main(int argc, char **argv)
-{
-	int i, cmd, kflag;
-	char *line, *p;
-
-	Binit(&binbuf, 0, OREAD);
-	Binit(&boutbuf, 1, OWRITE);
-	kflag = 0;
-	line = 0;
-	dict = 0;
-	for(i=0; dicts[i].name; i++){
-		if(access(dicts[i].path, 0)>=0 && access(dicts[i].indexpath, 0)>=0){
-			dict = &dicts[i];
-			break;
-		}
-	}
-	ARGBEGIN {
-		case 'd':
-			p = ARGF();
-			dict = 0;
-			if(p) {
-				for(i=0; dicts[i].name; i++)
-					if(strcmp(p, dicts[i].name)==0) {
-						dict = &dicts[i];
-						break;
-					}
-			}
-			if(!dict)
-				usage();
-			break;
-		case 'c':
-			line = ARGF();
-			if(!line)
-				usage();
-			break;
-		case 'k':
-			kflag++;
-			break;
-		case 'D':
-			debug++;
-			break;
-		default:
-			usage();
-	ARGEND }
-	if(dict == 0){
-		err("no dictionaries present on this system");
-		exits("nodict");
-	}
-
-	if(kflag) {
-		(*dict->printkey)();
-		exits(0);
-	}
-	if(argc > 1)
-		usage();
-	else if(argc == 1) {
-		if(line)
-			usage();
-		p = argv[0];
-		line = malloc(strlen(p)+5);
-		sprint(line, "/%s/P\n", p);
-	}
-	bdict = Bopen(dict->path, OREAD);
-	if(!bdict) {
-		err("can't open dictionary %s", dict->path);
-		exits("nodict");
-	}
-	bindex = Bopen(dict->indexpath, OREAD);
-	if(!bindex) {
-		err("can't open index %s", dict->indexpath);
-		exits("noindex");
-	}
-	indextop = Bseek(bindex, 0L, 2);
-
-	dot = malloc(sizeof(Addr)+(Aslots-1)*sizeof(ulong));
-	dot->n = 0;
-	dot->cur = 0;
-	dot->maxn = Aslots;
-	lastcmd = 0;
-
-	if(line) {
-		cmd = parsecmd(line);
-		if(cmd)
-			execcmd(cmd);
-	} else {
-		for(;;) {
-			Bprint(bout, "*");
-			Bflush(bout);
-			line = Brdline(bin, '\n');
-			linelen = 0;
-			if(!line)
-				break;
-			cmd = parsecmd(line);
-			if(cmd) {
-				execcmd(cmd);
-				lastcmd = cmd;
-			}
-		}
-	}
-	exits(0);
-}
-
-void
-usage(void)
-{
-	int i;
-	char *a, *b;
-
-	Bprint(bout, "Usage: %s [-d dict] [-k] [-c cmd] [word]\n", argv0);
-	Bprint(bout, "dictionaries (brackets mark dictionaries not present on this system):\n");
-	for(i = 0; dicts[i].name; i++){
-		a = b = "";
-		if(access(dicts[i].path, 0)<0 || access(dicts[i].indexpath, 0)<0){
-			a = "[";
-			b = "]";
-		}
-		Bprint(bout, "   %s%s\t%s%s\n", a, dicts[i].name, dicts[i].desc, b);
-	}
-	exits("usage");
-}
-
-int
-parsecmd(char *line)
-{
-	char *e;
-	int cmd, ans;
-
-	if(parseaddr(line, &e) >= 0)
-		line = e;
-	else
-		return 0;
-	cmd = *line;
-	ans = cmd;
-	if(isupper(cmd))
-		cmd = tolower(cmd);
-	if(!(cmd == 'a' || cmd == 'h' || cmd == 'p' || cmd == 'r' ||
-	     cmd == '\n')) {
-		err("unknown command %c", cmd);
-		return 0;
-	}
-	if(cmd == '\n')
-		switch(lastcmd) {
-			case 0:	ans = 'H'; break;
-			case 'H':	ans = 'p'; break;
-			default :	ans = lastcmd; break;
-		}
-	else if(line[1] != '\n' && line[1] != 0)
-		err("extra stuff after command %c ignored", cmd);
-	return ans;
-}
-
-void
-execcmd(int cmd)
-{
-	Entry e;
-	int cur, doall;
-
-	if(isupper(cmd)) {
-		doall = 1;
-		cmd = tolower(cmd);
-		cur = 0;
-	} else {
-		doall = 0;
-		cur = dot->cur;
-	}
-
-	if(debug && doall && cmd == 'a')
-		Bprint(bout, "%d entries, cur=%d\n", dot->n, cur+1);
-	for(;;){
-		if(cur >= dot->n)
-			break;
-		if(doall) {
-			Bprint(bout, "%d\t", cur+1);
-			linelen += 4 + (cur >= 10);
-		}
-		switch(cmd) {
-		case 'a':
-			Bprint(bout, "#%lu\n", dot->doff[cur]);
-			break;
-		case 'h':
-		case 'p':
-		case 'r':
-			e = getentry(cur);
-			(*dict->printentry)(e, cmd);
-			break;
-		}
-		cur++;
-		if(doall) {
-			if(cmd == 'p' || cmd == 'r') {
-				Bputc(bout, '\n');
-				linelen = 0;
-			}
-		} else
-			break;
-	}
-	if(cur >= dot->n)
-		cur = 0;
-	dot->cur = cur;
-}
-
-/*
- * Address syntax: ('.' | '/' re '/' | '!' re '!' | number | '#' number) ('+' | '-')*
- * Answer goes in dot.
- * Return -1 if address starts, but get error.
- * Return 0 if no address.
- */
-int
-parseaddr(char *line, char **eptr)
-{
-	int delim, plen;
-	uint32_t v;
-	char *e;
-	char pat[Plen];
-
-	if(*line == '/' || *line == '!') {
-		/* anchored regular expression match; '!' means no folding */
-		if(*line == '/') {
-			delim = '/';
-			e = strpbrk(line+1, "/\n");
-		} else {
-			delim = '!';
-			e = strpbrk(line+1, "!\n");
-		}
-		plen = e-line-1;
-		if(plen >= Plen-3) {
-			err("pattern too big");
-			return -1;
-		}
-		pat[0] = '^';
-		memcpy(pat+1, line+1, plen);
-		pat[plen+1] = '$';
-		pat[plen+2] = 0;
-		if(*e == '\n')
-			line = e;
-		else
-			line = e+1;
-		if(!search(pat, delim == '/')) {
-			err("pattern not found");
-			return -1;
-		}
-	} else if(*line == '#') {
-		/* absolute byte offset into dictionary */
-		line++;
-		if(!isdigit(*line))
-			return -1;
-		v = strtoul(line, &e, 10);
-		line = e;
-		dot->doff[0] = v;
-		dot->n = 1;
-		dot->cur = 0;
-	} else if(isdigit(*line)) {
-		v = strtoul(line, &e, 10);
-		line = e;
-		if(v < 1 || v > dot->n)
-			err(".%d not in range [1,%d], ignored",
-				v, dot->n);
-		else
-			dot->cur = v-1;
-	} else if(*line == '.') {
-		line++;
-	} else {
-		*eptr = line;
-		return 0;
-	}
-	while(*line == '+' || *line == '-') {
-		if(*line == '+')
-			setdotnext();
-		else
-			setdotprev();
-		line++;
-	}
-	*eptr = line;
-	return 1;
-}
-
-/*
- * Index file is sorted by folded field1.
- * Method: find pre, a folded prefix of r.e. pat,
- * and then low = offset to beginning of
- * line in index file where first match of prefix occurs.
- * Then go through index until prefix no longer matches,
- * adding each line that matches real pattern to dot.
- * Finally, sort dot offsets (uniquing).
- * We know pat len < Plen, and that it is surrounded by ^..$
- */
-int
-search(char *pat, int dofold)
-{
-	int needre, prelen, match, n;
-	Reprog *re;
-	int32_t ioff, v;
-	Rune pre[Plen];
-	Rune lit[Plen];
-	Rune entry[Fieldlen];
-	char fpat[Plen];
-
-	prelen = getpref(pat+1, pre);
-	if(pat[prelen+1] == 0 || pat[prelen+1] == '$') {
-		runescpy(lit, pre);
-		if(dofold)
-			fold(lit);
-		needre = 0;
-		SET(re);
-	} else {
-		needre = 1;
-		if(dofold) {
-			foldre(fpat, pat);
-			re = regcomp(fpat);
-		} else
-			re = regcomp(pat);
-	}
-	fold(pre);
-	ioff = locate(pre);
-	if(ioff < 0)
-		return 0;
-	dot->n = 0;
-	Bseek(bindex, ioff, 0);
-	for(;;) {
-		if(!getfield(entry))
-			break;
-		if(dofold)
-			fold(entry);
-		if(needre)
-			match = rregexec(re, entry, 0, 0);
-		else
-			match = (acomp(lit, entry) == 0);
-		if(match) {
-			if(!getfield(entry))
-				break;
-			v = runetol(entry);
-			if(dot->n >= dot->maxn) {
-				n = 2*dot->maxn;
-				dot = realloc(dot,
-					sizeof(Addr)+(n-1)*sizeof(int32_t));
-				if(!dot) {
-					err("out of memory");
-					exits("nomem");
-				}
-				dot->maxn = n;
-			}
-			dot->doff[dot->n++] = v;
-		} else {
-			if(!dofold)
-				fold(entry);
-			if(*pre) {
-				n = acomp(pre, entry);
-				if(n < -1 || (!needre && n < 0))
-					break;
-			}
-			/* get to next index entry */
-			if(!getfield(entry))
-				break;
-		}
-	}
-	sortaddr(dot);
-	dot->cur = 0;
-	return dot->n;
-}
-
-/*
- * Return offset in index file of first line whose folded
- * first field has pre as a prefix.  -1 if none found.
- */
-int32_t
-locate(Rune *pre)
-{
-	int32_t top, bot, mid;
-	Rune entry[Fieldlen];
-
-	if(*pre == 0)
-		return 0;
-	bot = 0;
-	top = indextop;
-	if(debug>1)
-		fprint(2, "locate looking for prefix %S\n", pre);
-	for(;;) {
-		/*
-		 * Loop invariant: foldkey(bot) < pre <= foldkey(top)
-		 * and bot < top, and bot,top point at beginning of lines
-		 */
-		mid = (top+bot) / 2;
-		mid = seeknextline(bindex, mid);
-		if(debug > 1)
-			fprint(2, "bot=%ld, mid=%ld->%ld, top=%ld\n",
-				bot, (top+bot) / 2, mid, top);
-		if(mid == top || !getfield(entry))
-			break;
-		if(debug > 1)
-			fprint(2, "key=%S\n", entry);
-		/*
-		 * here mid is strictly between bot and top
-		 */
-		fold(entry);
-		if(acomp(pre, entry) <= 0)
-			top = mid;
-		else
-			bot = mid;
-	}
-	/*
-	 * bot < top, but they don't necessarily point at successive lines
-	 * Use linear search from bot to find first line that pre is a
-	 * prefix of
-	 */
-	while((bot = seeknextline(bindex, bot)) <= top) {
-		if(!getfield(entry))
-			return -1;
-		if(debug > 1)
-			fprint(2, "key=%S\n", entry);
-		fold(entry);
-		switch(acomp(pre, entry)) {
-		case -2:
-			return -1;
-		case -1:
-		case 0:
-			return bot;
-		case 1:
-		case 2:
-			continue;
-		}
-	}
-	return -1;
-
-}
-
-/*
- * Get prefix of non re-metacharacters, runified, into pre,
- * and return length
- */
-int
-getpref(char *pat, Rune *pre)
-{
-	int n, r;
-	char *p;
-
-	p = pat;
-	while(*p) {
-		n = chartorune(pre, p);
-		r = *pre;
-		switch(r) {
-		case L'.': case L'*': case L'+': case L'?':
-		case L'[': case L']': case L'(': case ')':
-		case L'|': case L'^': case L'$':
-			*pre = 0;
-			return p-pat;
-		case L'\\':
-			p += n;
-			p += chartorune(++pre, p);
-			pre++;
-			break;
-		default:
-			p += n;
-			pre++;
-		}
-	}
-	return p-pat;
-}
-
-int32_t
-seeknextline(Biobuf *b, int32_t off)
-{
-	int32_t c;
-
-	Bseek(b, off, 0);
-	do {
-		c = Bgetrune(b);
-	} while(c>=0 && c!='\n');
-	return Boffset(b);
-}
-
-/*
- * Get next field out of index file (either tab- or nl- terminated)
- * Answer in *rp, assumed to be Fieldlen long.
- * Return 0 if read error first.
- */
-int
-getfield(Rune *rp)
-{
-	int32_t c;
-	int n;
-
-	for(n=Fieldlen; n-- > 0; ) {
-		if ((c = Bgetrune(bindex)) < 0)
-			return 0;
-		if(c == '\t' || c == '\n') {
-			*rp = L'\0';
-			return 1;
-		}
-		*rp++ = c;
-	}
-	err("word too long");
-	return 0;
-}
-
-/*
- * A compare longs function suitable for qsort
- */
-static int
-longcmp(const void *av, const void *bv)
-{
-	int32_t v;
-	int32_t *a, *b;
-
-	a = av;
-	b = bv;
-
-	v = *a - *b;
-	if(v < 0)
-		return -1;
-	else if(v == 0)
-		return 0;
-	else
-		return 1;
-}
-
-void
-sortaddr(Addr *a)
-{
-	int i, j;
-	int32_t v;
-
-	if(a->n <= 1)
-		return;
-
-	qsort(a->doff, a->n, sizeof(int32_t), longcmp);
-
-	/* remove duplicates */
-	for(i=0, j=0; j < a->n; j++) {
-		v = a->doff[j];
-		if(i > 0 && v == a->doff[i-1])
-			continue;
-		a->doff[i++] = v;
-	}
-	a->n = i;
-}
-
-Entry
-getentry(int i)
-{
-	int32_t b, e, n;
-	static Entry ans;
-	static int anslen = 0;
-
-	b = dot->doff[i];
-	e = (*dict->nextoff)(b+1);
-	ans.doff = b;
-	if(e < 0) {
-		err("couldn't seek to entry");
-		ans.start = 0;
-		ans.end = 0;
-	} else {
-		n = e-b;
-		if(n+1 > anslen) {
-			ans.start = realloc(ans.start, n+1);
-			if(!ans.start) {
-				err("out of memory");
-				exits("nomem");
-			}
-			anslen = n+1;
-		}
-		Bseek(bdict, b, 0);
-		n = Bread(bdict, ans.start, n);
-		ans.end = ans.start + n;
-		*ans.end = 0;
-	}
-	return ans;
-}
-
-void
-setdotnext(void)
-{
-	int32_t b;
-
-	b = (*dict->nextoff)(dot->doff[dot->cur]+1);
-	if(b < 0) {
-		err("couldn't find a next entry");
-		return;
-	}
-	dot->doff[0] = b;
-	dot->n = 1;
-	dot->cur = 0;
-}
-
-void
-setdotprev(void)
-{
-	int tryback;
-	int32_t here, last, p;
-
-	if(dot->cur < 0 || dot->cur >= dot->n)
-		return;
-	tryback = 2000;
-	here = dot->doff[dot->cur];
-	last = 0;
-	while(last == 0) {
-		p = here - tryback;
-		if(p < 0)
-			p = 0;
-		for(;;) {
-			p = (*dict->nextoff)(p+1);
-			if(p < 0)
-				return; /* shouldn't happen */
-			if(p >= here)
-				break;
-			last = p;
-		}
-		if(!last) {
-			if(here - tryback < 0) {
-				err("can't find a previous entry");
-				return;
-			}
-			tryback = 2*tryback;
-		}
-	}
-	dot->doff[0] = last;
-	dot->n = 1;
-	dot->cur = 0;
-}

+ 0 - 172
sys/src/cmd/dict/dict.h

@@ -1,172 +0,0 @@
-/*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
-
-/* Runes for special purposes (0xe800-0xfdff is Private Use Area) */
-enum {	NONE=0xe800,	/* Emit nothing */
-	TAGS,		/* Start of tag */
-	TAGE,		/* End of tag */
-	SPCS,		/* Start of special character name */
-	PAR,		/* Newline, indent */
-	LIGS,		/* Start of ligature codes */
-	LACU=LIGS,	/* Acute (´) ligatures */
-	LGRV,		/* Grave (ˋ) ligatures */
-	LUML,		/* Umlaut (¨) ligatures */
-	LCED,		/* Cedilla (¸) ligatures */
-	LTIL,		/* Tilde (˜) ligatures */
-	LBRV,		/* Breve (˘) ligatures */
-	LRNG,		/* Ring (˚) ligatures */
-	LDOT,		/* Dot (˙) ligatures */
-	LDTB,		/* Dot below (.) ligatures */
-	LFRN,		/* Frown (⌢) ligatures */
-	LFRB,		/* Frown below (̯) ligatures */
-	LOGO,		/* Ogonek (˛) ligatures */
-	LMAC,		/* Macron (¯) ligatures */
-	LHCK,		/* Hacek (ˇ) ligatures */
-	LASP,		/* Asper (ʽ) ligatures */
-	LLEN,		/* Lenis (ʼ) ligatures */
-	LBRB,		/* Breve below (̮) ligatures */
-	LIGE,		/* End of ligature codes */
-	MULTI,		/* Start of multi-rune codes */
-	MAAS=MULTI,	/* ʽα */
-	MALN,		/* ʼα */
-	MAND,		/* and */
-	MAOQ,		/* a/q */
-	MBRA,		/* <| */
-	MDD,		/* .. */
-	MDDD,		/* ... */
-	MEAS,		/* ʽε */
-	MELN,		/* ʼε */
-	MEMM,		/* —— */
-	MHAS,		/* ʽη */
-	MHLN,		/* ʼη */
-	MIAS,		/* ʽι */
-	MILN,		/* ʼι */
-	MLCT,		/* ct */
-	MLFF,		/* ff */
-	MLFFI,		/* ffi */
-	MLFFL,		/* ffl */
-	MLFL,		/* fl */
-	MLFI,		/* fi */
-	MLLS,		/* ll with swing */
-	MLST,		/* st */
-	MOAS,		/* ʽο */
-	MOLN,		/* ʼο */
-	MOR,		/* or */
-	MRAS,		/* ʽρ */
-	MRLN,		/* ʼρ */
-	MTT,		/* ~~ */
-	MUAS,		/* ʽυ */
-	MULN,		/* ʼυ */
-	MWAS,		/* ʽω */
-	MWLN,		/* ʼω */
-	MOE,		/* oe */
-	MES,		/* em space */
-	MULTIE,		/* End of multi-rune codes */
-};
-#define Nligs (LIGE-LIGS)
-#define Nmulti (MULTIE-MULTI)
-
-typedef struct Entry Entry;
-typedef struct Assoc Assoc;
-typedef struct Nassoc Nassoc;
-typedef struct Dict Dict;
-
-struct Entry {
-	char	*start;		/* entry starts at start */
-	char	*end;		/* and finishes just before end */
-	long	doff;		/* dictionary offset (for debugging) */
-};
-
-struct Assoc {
-	char	*key;
-	long	val;
-};
-
-struct Nassoc {
-	long	key;
-	long	val;
-};
-
-struct Dict {
-	char	*name;			/* dictionary name */
-	char	*desc;			/* description */
-	char	*path;			/* path to dictionary data */
-	char	*indexpath;		/* path to index data */
-	long	(*nextoff)(long);	/* function to find next entry offset from arg */
-	void	(*printentry)(Entry, int); /* function to print entry */
-	void	(*printkey)(void);	/* function to print pronunciation key */
-};
-
-int	acomp(Rune*, Rune*);
-Rune	*changett(Rune *, Rune *, int);
-void	err(char*, ...);
-void	fold(Rune *);
-void	foldre(char*, char*);
-Rune	liglookup(Rune, Rune);
-long	lookassoc(Assoc*, int, char*);
-long	looknassoc(Nassoc*, int, long);
-void	outprint(char*, ...);
-void	outrune(long);
-void	outrunes(Rune *);
-void	outchar(int);
-void	outchars(char *);
-void	outnl(int);
-void	outpiece(char *, char *);
-void	runescpy(Rune*, Rune*);
-long	runetol(Rune*);
-
-long	oednextoff(long);
-void	oedprintentry(Entry, int);
-void	oedprintkey(void);
-long	ahdnextoff(long);
-void	ahdprintentry(Entry, int);
-void	ahdprintkey(void);
-long	pcollnextoff(long);
-void	pcollprintentry(Entry, int);
-void	pcollprintkey(void);
-long	pcollgnextoff(long);
-void	pcollgprintentry(Entry, int);
-void	pcollgprintkey(void);
-long	movienextoff(long);
-void	movieprintentry(Entry, int);
-void	movieprintkey(void);
-long	pgwnextoff(long);
-void	pgwprintentry(Entry,int);
-void	pgwprintkey(void);
-void	rogetprintentry(Entry, int);
-long	rogetnextoff(long);
-void	rogetprintkey(void);
-long	slangnextoff(long);
-void	slangprintentry(Entry, int);
-void	slangprintkey(void);
-long	robertnextoff(long);
-void	robertindexentry(Entry, int);
-void	robertprintkey(void);
-long	robertnextflex(long);
-void	robertflexentry(Entry, int);
-long	simplenextoff(long);
-void	simpleprintentry(Entry, int);
-void	simpleprintkey(void);
-long	thesnextoff(long);
-void	thesprintentry(Entry, int);
-void	thesprintkey(void);
-long	worldnextoff(long);
-void	worldprintentry(Entry, int);
-void	worldprintkey(void);
-
-extern Biobuf	*bdict;
-extern Biobuf	*bout;
-extern int	linelen;
-extern int	breaklen;
-extern int	outinhibit;
-extern int	debug;
-extern Rune	*multitab[];
-extern Dict	dicts[];
-
-#define asize(a) (sizeof (a)/sizeof(a[0]))

+ 0 - 15
sys/src/cmd/dict/egfix

@@ -1,15 +0,0 @@
-#!/bin/rc
-
-sed '
-	s/[ 	]+$//
-	/	/!d
-	/, /{; h; s/,.*//; p; g; s/	.*, /	/; }
-' $1 |
-sed '
-	/\(/{; h; s/\([^)]+\)//; p; g; s/[()]//g; }
-' |
-sed '
-	s/	 +/	/
-	s/[ 	]+$//
-	s/  +/ /g
-'

+ 0 - 8
sys/src/cmd/dict/egfix2

@@ -1,8 +0,0 @@
-#!/bin/rc
-
-awk '
-BEGIN	{ FS = "	|, " }
-	{ for(i=2; i<=NF; i++)print $i "	" $1 }
-' $1 |
-tr A-Z a-z |
-sort -u -t'	' +0f -1 +0 -1 +1n -2

+ 0 - 123
sys/src/cmd/dict/fromemf

@@ -1,123 +0,0 @@
-From emf Tue Sep 19 10:41:30 EDT 1989
-Here are the IPA equivalents of the Collins code for German-English.
-RE: Avant-garde, it is listed as exemplifying a nasal vowel in the
-Collins pronunciation key, and occurs in the English-German dictionary,
-where it is translated into German as Avantgarde.  However, it does
-not appear in the German-English dictionary.
-
-I'll drop by with your hardcopy dictionary today or tomorrow.
-Thanks.
-
-IPA		COLLINS		EXAMPLE
-
-[']		349		H Salon
-				P za\N'349'lo^~:
-
-[a]		a		H ab
-				P ap
-
-upsilon		348  		H Abbau 
-				P \N'349'apba\fp\N'348'\f1
-
-schwa		343  		H aalen
-				P \N'349'a:l\fp\N'343'\f1n
-
-[i:]		i:           	H abbiegen
-				P \N'349'apbi:g\fp\N'343'\f1n
-
-[I]		351  		H Abbild
-				P \N'349'apb\fp\N'351'\f1lt
-
-turned script a 356		H abdominal 
-				P \x91b\N'349'd\fp\N'356'\f1m\fp\N'351'\f1nl
-
-open o		341		H abdrosseln
-				P \N'349'apdr\fp\N'341'\f1s\fp\N'343'\f1ln
-
-upsilon		348		H Abbau
-				P \N'349'apba\fp\N'348'\f1
-
-[b]		b
-[d]		d
-[f]		f
-[g]		g
-[h]		h
-[j]		j
-[k]		k	
-[l]		l
-[m]		m
-[n]		n
-
-eng		384		H Abgang
-				P \N'349'apga\fp\N'384'\f1
-
-[p]		p
-[s]		s
-
-esh		346		H abgespannt
-				P \N'349'apg\fp\N'343'\N'346'\f1pant
-
-[t]		t
-[v]		v
-[x]		x
-[z]		z
-
-yogh		345		 H azure
-				\N'349'e\fp\N'351'\f1\fp\N'345'\f1\fp\N'343'\f1*
-
-ash		x91		H acrobat
-				P \N'349'\x91kr\fp\N'343'\f1b\x91t
-
-script a:	354		H almond
-				P \N'349'\fp\N'354'\f1:m\fp\N'343'\f1nd
-
-[e]		e
-
-rvrsd epsilon:	352		H absurd
-				P \fp\N'343'\f1b\N'349's\fp\N'352'\f1:d
-
-[u:]		u:
-
-inverted v	342		H above
-				P \fp\N'343'\f1\fp\N'349'\f1b\fp\N'342'\f1v
-
-superscript tilde
-over
-  open o	o^~ 		H Salon
-				P za\N'349'lo^~:
-
-script a	
-lower case a
-epsilon
-o
-
-[w]		w
-
-theta		347 		H apathy
-				P \N'349'\x91p\fp\N'343'\f1\fp\N'347'
-				  \f1\fp\N'351'\f1
-
-
-eth		445 		H another
-				P \fp\N'343'\f1\N'349'n\fp\N'342'\f1\N'445'
-				  \fp\N'343'\f1*
-
-epsilon		340 		H aba^:ndern
-				P \N'349'ap'\fp\N'340'\f1nd\fp\N'343'\f1rn
-
-slashed o	x97 		H abgewo^:hnen
-				P \N'349'apg\fp\N'343'\f1v\x97:n\fp\N'343'\f1n
-
-o-e ligature 	(oe 		H abbro^:ckeln
-				P \N'349'apbr\(oek\fp\N'343'\f1ln
-
-small capital y	y 		H abbru^:hen
-				P \N'349'apbry:\fp\N'343'\f1n
-
-c cedilla	c^ 		H abbrechen
-				P \N'349'apbr\fp\N'340'\f1c^,\fp\N'343'\f1n
-
-
-
-
-

File diff suppressed because it is too large
+ 0 - 1117
sys/src/cmd/dict/gb2312.c


+ 0 - 23
sys/src/cmd/dict/gefix

@@ -1,23 +0,0 @@
-#!/bin/rc
-
-sed '
-	s/[ 	]+$//
-	/	/!d
-	s/\\N''349''//g
-	s/''//g
-	s/	-/	/
-	s/-$//
-	/\([^,) ]+(\)|$)/{; h; s///; p; g; s/\(//; s/\)//; }
-	/\(r, s\)$/{; s///; p; s/$/r/; p; s/r$/s/; }
-' $1 |
-sed '
-	/\([^,) ]+(\)|$)/{; h; s///; p; g; s/\(//; s/\)//; }
-	/\(r, s\)$/{; s///; p; s/$/r/; p; s/r$/s/; }
-' |
-sed '/ß/{; p; s/ß/ss/g; }' |
-awk '
-BEGIN	{ FS = "	|, " }
-	{ for(i=2; i<=NF; i++)print $i "	" $1 }
-' |
-tr A-Z a-z |
-sort -u -t'	' +0f -1 +0 -1 +1n -2

+ 0 - 8
sys/src/cmd/dict/getneeds

@@ -1,8 +0,0 @@
-#!/bin/rc
-for (x in spec tag aux status) {
-	grep ' '^$x^' ' $1 > junk1
-	sort +4 -5 +3n -4 junk1 > junk2
-	awk '{if ($5 != prev) print $0; prev = $5}' junk2 > junk3
-	sort -n +2 -3 junk3 > need$x
-	rm junk*
-}

File diff suppressed because it is too large
+ 0 - 1068
sys/src/cmd/dict/jis208.c


+ 0 - 123
sys/src/cmd/dict/kuten.h

@@ -1,123 +0,0 @@
-/*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
-
-/*
-	following astonishing goo courtesy of kogure.
-*/
-/*
- * MicroSoft Kanji Encoding (SJIS) Transformation
- */
-
-/*
- * void
- * J2S(unsigned char *_h, unsigned char *_l)
- *	JIS X 208 to MS kanji transformation.
- *
- * Calling/Exit State:
- *	_h and _l should be in their valid range.
- *	No return value.
- */
-#define J2S(_h, _l) { \
-	/* lower: 21-7e >> 40-9d,9e-fb >> 40-7e,(skip 7f),80-fc */ \
-	if (((_l) += (((_h)-- % 2) ? 0x1f : 0x7d)) > 0x7e) (_l)++; \
-	/* upper: 21-7e >> 81-af >> 81-9f,(skip a0-df),e0-ef */ \
-	if (((_h) = ((_h) / 2 + 0x71)) > 0x9f) (_h) += 0x40; \
-}
-
-/*
- * void
- * S2J(unsigned char *_h, unsigned char *_l)
- *	MS kanji to JIS X 208 transformation.
- *
- * Calling/Exit State:
- *	_h and _l should be in valid range.
- *	No return value.
- */
-#define S2J(_h, _l) { \
-	/* lower: 40-7e,80-fc >> 21-5f,61-dd >> 21-7e,7f-dc */ \
-	if (((_l) -= 0x1f) > 0x60) (_l)--; \
-	/* upper: 81-9f,e0-ef >> 00-1e,5f-6e >> 00-2e >> 21-7d */ \
-	if (((_h) -= 0x81) > 0x5e) (_h) -= 0x40; (_h) *= 2, (_h) += 0x21; \
-	/* upper: ,21-7d >> ,22-7e ; lower: ,7f-dc >> ,21-7e */ \
-	if ((_l) > 0x7e) (_h)++, (_l) -= 0x5e; \
-}
-
-/*
- * int
- * ISJKANA(const unsigned char *_b)
- *	Tests given byte is in the range of JIS X 0201 katakana.
- *
- * Calling/Exit State:
- *	Returns 1 if it is, or 0 otherwise.
- */
-#define	ISJKANA(_b)	(0xa0 <= (_b) && (_b) < 0xe0)
-
-/*
- * int
- * CANS2JH(const unsigned char *_h)
- *	Tests given byte is in the range of valid first byte of MS
- *	kanji code; either acts as a subroutine of CANS2J() macro
- *	or can be used to parse MS kanji encoded strings.
- *
- * Calling/Exit State:
- *	Returns 1 if it is, or 0 otherwise.
- */
-#define CANS2JH(_h)	((0x81 <= (_h) && (_h) < 0xf0) && !ISJKANA(_h))
-
-/*
- * int
- * CANS2JL(const unsigned char *_l)
- *	Tests given byte is in the range of valid second byte of MS
- *	kanji code; acts as a subroutine of CANS2J() macro.
- *
- * Calling/Exit State:
- *	Returns 1 if it is, or 0 otherwise.
- */
-#define CANS2JL(_l)	(0x40 <= (_l) && (_l) < 0xfd && (_l) != 0x7f)
-
-/*
- * int
- * CANS2J(const unsigned char *_h, const unsinged char *_l)
- *	Tests given bytes form a MS kanji code point which can be
- *	transformed to a valid JIS X 208 code point.
- *
- * Calling/Exit State:
- *	Returns 1 if they are, or 0 otherwise.
- */
-#define CANS2J(_h, _l)  (CANS2JH(_h) && CANS2JL(_l))
-
-/*
- * int
- * CANJ2SB(const unsigned char *_b)
- *	Tests given bytes is in the range of valid 94 graphic
- *	character set; acts as a subroutine of CANJ2S() macro.
- *
- * Calling/Exit State:
- *	Returns 1 if it is, or 0 otherwise.
- */
-#define CANJ2SB(_b)	(0x21 <= (_b) && (_b) < 0x7f)
-
-/*
- * int
- * CANJ2S(const unsigned char *_h, const unsigned char *_l)
- *	Tests given bytes form valid JIS X 208 code points
- *	(which can be transformed to MS kanji).
- *
- * Calling/Exit State:
- *	Returns 1 if they are, or 0 otherwise.
- */
-#define CANJ2S(_h, _l)	(CANJ2SB(_h) && CANJ2SB(_l))
-
-#define		JIS208MAX	8407
-#define		GB2312MAX	8795
-#define		BIG5MAX		13973
-
-extern Rune tabjis208[JIS208MAX];	/* runes indexed by kuten */
-extern Rune tabgb2312[GB2312MAX];
-extern Rune tabbig5[BIG5MAX];

+ 0 - 23
sys/src/cmd/dict/mkfile

@@ -1,23 +0,0 @@
-</$objtype/mkfile
-
-TARG=dict
-LFILES=oed.$O ahd.$O pcollins.$O pcollinsg.$O movie.$O slang.$O robert.$O\
-	world.$O jis208.$O gb2312.$O thesaurus.$O simple.$O pgw.$O roget.$O
-
-OFILES=dict.$O\
-	$LFILES\
-	utils.$O
-
-HFILES=dict.h kuten.h
-
-BIN=/$objtype/bin
-
-UPDATE=\
-	mkfile\
-	$HFILES\
-	${OFILES:%.$O=%.c}\
-
-</sys/src/cmd/mkone
-
-mkindex: mkindex.$O $LFILES utils.$O
-	$LD $LDFLAGS -o $target $prereq

+ 0 - 115
sys/src/cmd/dict/mkindex.c

@@ -1,115 +0,0 @@
-/*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
-
-#include <u.h>
-#include <libc.h>
-#include <bio.h>
-#include "dict.h"
-
-/*
- * Use this to start making an index for a new dictionary.
- * Get the dictionary-specific nextoff and printentry(_,'h')
- * commands working, add a record to the dicts[] array below,
- * and run this program to get a list of offset,headword
- * pairs
- */
-Biobuf	boutbuf;
-Biobuf	*bdict;
-Biobuf	*bout = &boutbuf;
-int	linelen;
-int	breaklen = 2000;
-int	outinhibit;
-int	debug;
-
-Dict	*dict;	/* current dictionary */
-
-Entry	getentry(int32_t);
-
-void
-main(int argc, char **argv)
-{
-	int i;
-	long a, ae;
-	char *p;
-	Entry e;
-
-	Binit(&boutbuf, 1, OWRITE);
-	dict = &dicts[0];
-	ARGBEGIN {
-		case 'd':
-			p = ARGF();
-			dict = 0;
-			if(p) {
-				for(i=0; dicts[i].name; i++)
-					if(strcmp(p, dicts[i].name)==0) {
-						dict = &dicts[i];
-						break;
-					}
-			}
-			if(!dict) {
-				err("unknown dictionary: %s", p);
-				exits("nodict");
-			}
-			break;
-		case 'D':
-			debug++;
-			break;
-	ARGEND }
-	USED(argc,argv);
-	bdict = Bopen(dict->path, OREAD);
-	ae = Bseek(bdict, 0, 2);
-	if(!bdict) {
-		err("can't open dictionary %s", dict->path);
-		exits("nodict");
-	}
-	for(a = 0; a < ae; a = (*dict->nextoff)(a+1)) {
-		linelen = 0;
-		e = getentry(a);
-		Bprint(bout, "%ld\t", a);
-		linelen = 4;	/* only has to be approx right */
-		(*dict->printentry)(e, 'h');
-	}
-	exits(0);
-}
-
-Entry
-getentry(int32_t b)
-{
-	int32_t e, n, dtop;
-	static Entry ans;
-	static int anslen = 0;
-
-	e = (*dict->nextoff)(b+1);
-	ans.doff = b;
-	if(e < 0) {
-		dtop = Bseek(bdict, 0L, 2);
-		if(b < dtop) {
-			e = dtop;
-		} else {
-			err("couldn't seek to entry");
-			ans.start = 0;
-			ans.end = 0;
-		}
-	}
-	n = e-b;
-	if(n) {
-		if(n > anslen) {
-			ans.start = realloc(ans.start, n);
-			if(!ans.start) {
-				err("out of memory");
-				exits("nomem");
-			}
-			anslen = n;
-		}
-		Bseek(bdict, b, 0);
-		n = Bread(bdict, ans.start, n);
-		ans.end = ans.start + n;
-	}
-	return ans;
-}

+ 0 - 23
sys/src/cmd/dict/mkroget

@@ -1,23 +0,0 @@
-#!/bin/rc
-
-rtf2txt roget-body.rtf |
-	sed '
-		1,12d
-		/^100. /{
-			N
-			s/\n//
-			p
-		}
-		/^388a. /{
-			N
-			s/\n//
-			p
-		}
-	' > /lib/dict/roget
-
-mkindex -d roget | 
-	sort -u -t'	' +0f -1 +0 -1 +1n -2 |
-	sed '
-		s/[ 	]+$//g
-		s/ 	/	/g
-	' > /lib/dict/rogetindex

+ 0 - 333
sys/src/cmd/dict/movie.c

@@ -1,333 +0,0 @@
-/*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
-
-#include <u.h>
-#include <libc.h>
-#include <bio.h>
-#include "dict.h"
-
-/* Possible tags */
-enum {
-	BEG,	/* beginning of entry */
-	AB,	/* abstract */
-	AN,	/* database serial number */
-	AS,	/* author (one at a time) */
-	AU,	/* all authors */
-	AW,	/* award_awardee */
-	BW,	/* bw or c */
-	CA,	/* cast: character_actor */
-	CN,	/* cinematography */
-	CO,	/* country */
-	CR,	/* miscellaneous job_name */
-	DE,	/* topic keyword */
-	DR,	/* director */
-	ED,	/* editor */
-	MP,	/* MPAA rating (R, PG, etc.) */
-	NT,	/* note */
-	PR,	/* producer and for ...*/
-	PS,	/* producer (repeats info in PR) */
-	RA,	/* rating (letter) */
-	RD,	/* release date */
-	RT,	/* running time */
-	RV,	/* review citation */
-	ST,	/* production or release company (repeats info in PR) */
-	TI,	/* title[; original foreign title] */
-	TX,	/* paragraph of descriptive text */
-	VD,	/* video information (format_time_company; or "Not Avail.") */
-	NTAG	/* number of tags */
-};
-
-/* Assoc tables must be sorted on first field */
-
-static char *tagtab[] = {
-[BEG]	"$$",
-[AB]	"AB",
-[AN]	"AN",
-[AS]	"AS",
-[AU]	"AU",
-[AW]	"AW",
-[BW]	"BW",
-[CA]	"CA",
-[CN]	"CN",
-[CO]	"CO",
-[CR]	"CR",
-[DE]	"DE",
-[DR]	"DR",
-[ED]	"ED",
-[MP]	"MP",
-[NT]	"NT",
-[PR]	"PR",
-[PS]	"PS",
-[RA]	"RA",
-[RD]	"RD",
-[RT]	"RT",
-[RV]	"RV",
-[ST]	"ST",
-[TI]	"TI",
-[TX]	"TX",
-[VD]	"VD",
-};
-
-static char	*mget(int, char *, char *, char **);
-static void	moutall(int, char *, char *);
-static void	moutall2(int, char *, char *);
-
-void
-movieprintentry(Entry ent, int cmd)
-{
-	char *p, *e, *ps, *pe, *pn;
-	int n;
-
-	ps = ent.start;
-	pe = ent.end;
-	if(cmd == 'r') {
-		Bwrite(bout, ps, pe-ps);
-		return;
-	}
-	p = mget(TI, ps, pe, &e);
-	if(p) {
-		outpiece(p, e);
-		outnl(0);
-	}
-	if(cmd == 'h')
-		return;
-	outnl(2);
-	n = 0;
-	p = mget(RD, ps, pe, &e);
-	if(p) {
-		outchars("Released: ");
-		outpiece(p, e);
-		n++;
-	}
-	p = mget(CO, ps, pe, &e);
-	if(p) {
-		if(n)
-			outchars(", ");
-		outpiece(p, e);
-		n++;
-	}
-	p = mget(RT, ps, pe, &e);
-	if(p) {
-		if(n)
-			outchars(", ");
-		outchars("Running time: ");
-		outpiece(p, e);
-		n++;
-	}
-	p = mget(MP, ps, pe, &e);
-	if(p) {
-		if(n)
-			outchars(", ");
-		outpiece(p, e);
-		n++;
-	}
-	p = mget(BW, ps, pe, &e);
-	if(p) {
-		if(n)
-			outchars(", ");
-		if(*p == 'c' || *p == 'C')
-			outchars("Color");
-		else
-			outchars("B&W");
-		n++;
-	}
-	if(n) {
-		outchar('.');
-		outnl(1);
-	}
-	p = mget(VD, ps, pe, &e);
-	if(p) {
-		outchars("Video: ");
-		outpiece(p, e);
-		outnl(1);
-	}
-	p = mget(AU, ps, pe, &e);
-	if(p) {
-		outchars("By: ");
-		moutall2(AU, ps, pe);
-		outnl(1);
-	}
-	p = mget(DR, ps, pe, &e);
-	if(p) {
-		outchars("Director: ");
-		outpiece(p, e);
-		outnl(1);
-	}
-	p = mget(PR, ps, pe, &e);
-	if(p) {
-		outchars("Producer: ");
-		outpiece(p, e);
-		outnl(1);
-	}
-	p = mget(CN, ps, pe, &e);
-	if(p) {
-		outchars("Cinematograpy: ");
-		outpiece(p, e);
-		outnl(1);
-	}
-	p = mget(CR, ps, pe, &e);
-	if(p) {
-		outchars("Other Credits: ");
-		moutall2(CR, ps, pe);
-	}
-	outnl(2);
-	p = mget(CA, ps, pe, &e);
-	if(p) {
-		outchars("Cast: ");
-		moutall2(CA, ps, pe);
-	}
-	outnl(2);
-	p = mget(AW, ps, pe, &e);
-	if(p) {
-		outchars("Awards: ");
-		moutall2(AW, ps, pe);
-		outnl(2);
-	}
-	p = mget(NT, ps, pe, &e);
-	if(p) {
-		outpiece(p, e);
-		outnl(2);
-	}
-	p = mget(AB, ps, pe, &e);
-	if(p) {
-		outpiece(p, e);
-		outnl(2);
-	}
-	pn = ps;
-	n = 0;
-	while((p = mget(TX, pn, pe, &pn)) != 0) {
-		if(n++)
-			outnl(1);
-		outpiece(p, pn);
-	}
-	outnl(0);
-}
-
-int32_t
-movienextoff(int32_t fromoff)
-{
-	int32_t a;
-	char *p;
-
-	a = Bseek(bdict, fromoff, 0);
-	if(a < 0)
-		return -1;
-	for(;;) {
-		p = Brdline(bdict, '\n');
-		if(!p)
-			break;
-		if(p[0] == '$' && p[1] == '$')
-			return (Boffset(bdict)-Blinelen(bdict));
-	}
-	return -1;
-}
-
-void
-movieprintkey(void)
-{
-	Bprint(bout, "No key\n");
-}
-
-/*
- * write a comma-separated list of all tag values between b and e
- */
-static void
-moutall(int tag, char *b, char *e)
-{
-	char *p, *pn;
-	int n;
-
-	n = 0;
-	pn = b;
-	while((p = mget(tag, pn, e, &pn)) != 0) {
-		if(n++)
-			outchars(", ");
-		outpiece(p, pn);
-	}
-}
-
-/*
- * like moutall, but values are expected to have form:
- *    field1_field2
- * and we are to output 'field2 (field1)' for each
- * (sometimes field1 has underscores, so search from end)
- */
-static void
-moutall2(int tag, char *b, char *e)
-{
-	char *p, *pn, *us, *q;
-	int n;
-
-	n = 0;
-	pn = b;
-	while((p = mget(tag, pn, e, &pn)) != 0) {
-		if(n++)
-			outchars(", ");
-		us = 0;
-		for(q = pn-1; q >= p; q--)
-			if(*q == '_') {
-				us = q;
-				break;
-			}
-		if(us) {
-			/*
-			 * Hack to fix cast list Himself/Herself
-			 */
-			if(strncmp(us+1, "Himself", 7) == 0 ||
-			   strncmp(us+1, "Herself", 7) == 0) {
-				outpiece(p, us);
-				outchars(" (");
-				outpiece(us+1, pn);
-				outchar(')');
-			} else {
-				outpiece(us+1, pn);
-				outchars(" (");
-				outpiece(p, us);
-				outchar(')');
-			}
-		} else {
-			outpiece(p, pn);
-		}
-	}
-}
-
-/*
- * Starting from b, find next line beginning with tagtab[tag].
- * Don't go past e, but assume *e==0.
- * Return pointer to beginning of value (after tag), and set
- * eptr to point at newline that ends the value
- */
-static char *
-mget(int tag, char *b, char *e, char **eptr)
-{
-	char *p, *t, *ans;
-
-	if(tag < 0 || tag >= NTAG)
-		return 0;
-	t = tagtab[tag];
-	ans = 0;
-	for(p = b;;) {
-		p = strchr(p, '\n');
-		if(!p || ++p >= e) {
-			if(ans)
-				*eptr = e-1;
-			break;
-		}
-		if(!ans) {
-			if(p[0] == t[0] && p[1] == t[1])
-				ans = p+3;
-		} else {
-			if(p[0] != ' ') {
-				*eptr = p-1;
-				break;
-			}
-		}
-	}
-	return ans;
-}

File diff suppressed because it is too large
+ 0 - 1434
sys/src/cmd/dict/oed.c


+ 0 - 235
sys/src/cmd/dict/pcollins.c

@@ -1,235 +0,0 @@
-/*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
-
-#include <u.h>
-#include <libc.h>
-#include <bio.h>
-#include "dict.h"
-
-/*
- * Routines for handling dictionaries in the "Paperback Collins"
- * format (with tags surrounded by >....<)
- */
-enum {
-	Buflen=1000,
-};
-
-/* More special runes */
-enum {
-	B = MULTIE+1,	/* bold */
-	H,		/* headword start */
-	I,		/* italics */
-	Ps,		/* pronunciation start */
-	Pe,		/* pronunciation end */
-	R,		/* roman */
-	X,		/* headword end */
-};
-
-/* Assoc tables must be sorted on first field */
-
-static Assoc tagtab[] = {
-	{"AA",		L'Å'},
-	{"AC",		LACU},
-	{"B",		B},
-	{"CE",		LCED},
-	{"CI",		LFRN},
-	{"Di",		L'ı'},
-	{"EL",		L'-'},
-	{"GR",		LGRV},
-	{"H",		H},
-	{"I",		I},
-	{"OE",		L'Œ'},
-	{"R",		R},
-	{"TI",		LTIL},
-	{"UM",		LUML},
-	{"X",		X},
-	{"[",		Ps},
-	{"]",		Pe},
-	{"ac",		LACU},
-	{"ce",		LCED},
-	{"ci",		LFRN},
-	{"gr",		LGRV},
-	{"oe",		L'œ'},
-	{"supe",	L'e'},		/* should be raised */
-	{"supo",	L'o'},		/* should be raised */
-	{"ti",		LTIL},
-	{"um",		LUML},
-	{"{",		Ps},
-	{"~",		L'~'},
-	{"~~",		MTT},
-};
-
-static Rune normtab[128] = {
-	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
-/*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-	NONE,	NONE,	L' ',	NONE,	NONE,	NONE,	NONE,	NONE,
-/*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-/*20*/	L' ',	L'!',	L'"',	L'#',	L'$',	L'%',	L'&',	L'\'',
-	L'(',	L')',	L'*',	L'+',	L',',	L'-',	L'.',	L'/',
-/*30*/  L'0',	L'1',	L'2',	L'3',	L'4',	L'5',	L'6',	L'7',
-	L'8',	L'9',	L':',	L';',	TAGE,	L'=',	TAGS,	L'?',
-/*40*/  L'@',	L'A',	L'B',	L'C',	L'D',	L'E',	L'F',	L'G',
-	L'H',	L'I',	L'J',	L'K',	L'L',	L'M',	L'N',	L'O',
-/*50*/	L'P',	L'Q',	L'R',	L'S',	L'T',	L'U',	L'V',	L'W',
-	L'X',	L'Y',	L'Z',	L'[',	L'\\',	L']',	L'^',	L'_',
-/*60*/	L'`',	L'a',	L'b',	L'c',	L'd',	L'e',	L'f',	L'g',
-	L'h',	L'i',	L'j',	L'k',	L'l',	L'm',	L'n',	L'o',
-/*70*/	L'p',	L'q',	L'r',	L's',	L't',	L'u',	L'v',	L'w',
-	L'x',	L'y',	L'z',	L'{',	L'|',	L'}',	L'~',	NONE,
-};
-
-static char *gettag(char *, char *);
-
-static Entry	curentry;
-static char	tag[Buflen];
-#define cursize (curentry.end-curentry.start)
-
-void
-pcollprintentry(Entry e, int cmd)
-{
-	char *p, *pe;
-	int32_t r, rprev, t, rlig;
-	int saveoi;
-	Rune *transtab;
-
-	p = e.start;
-	pe = e.end;
-	transtab = normtab;
-	rprev = NONE;
-	changett(0, 0, 0);
-	curentry = e;
-	saveoi = 0;
-	if(cmd == 'h')
-		outinhibit = 1;
-	while(p < pe) {
-		if(cmd == 'r') {
-			outchar(*p++);
-			continue;
-		}
-		r = transtab[(*p++)&0x7F];
-		if(r < NONE) {
-			/* Emit the rune, but buffer in case of ligature */
-			if(rprev != NONE)
-				outrune(rprev);
-			rprev = r;
-		} else if(r == TAGS) {
-			p = gettag(p, pe);
-			t = lookassoc(tagtab, asize(tagtab), tag);
-			if(t == -1) {
-				if(debug && !outinhibit)
-					err("tag %ld %d %s",
-						e.doff, cursize, tag);
-				continue;
-			}
-			if(t < NONE) {
-				if(rprev != NONE)
-					outrune(rprev);
-				rprev = t;
-			} else if(t >= LIGS && t < LIGE) {
-				/* handle possible ligature */
-				rlig = liglookup(t, rprev);
-				if(rlig != NONE)
-					rprev = rlig;	/* overwrite rprev */
-				else {
-					/* could print accent, but let's not */
-					if(rprev != NONE) outrune(rprev);
-					rprev = NONE;
-				}
-			} else if(t >= MULTI && t < MULTIE) {
-				if(rprev != NONE) {
-					outrune(rprev);
-					rprev = NONE;
-				}
-				outrunes(multitab[t-MULTI]);
-			} else {
-				if(rprev != NONE) {
-					outrune(rprev);
-					rprev = NONE;
-				}
-				switch(t){
-				case H:
-					if(cmd == 'h')
-						outinhibit = 0;
-					else
-						outnl(0);
-					break;
-				case X:
-					if(cmd == 'h')
-						outinhibit = 1;
-					else
-						outchars(".  ");
-					break;
-				case Ps:
-					/* don't know enough of pron. key yet */
-					saveoi = outinhibit;
-					outinhibit = 1;
-					break;
-				case Pe:
-					outinhibit = saveoi;
-					break;
-				}
-			}
-		}
-	}
-	if(cmd == 'h')
-		outinhibit = 0;
-	outnl(0);
-}
-
-int32_t
-pcollnextoff(int32_t fromoff)
-{
-	int32_t a;
-	char *p;
-
-	a = Bseek(bdict, fromoff, 0);
-	if(a < 0)
-		return -1;
-	for(;;) {
-		p = Brdline(bdict, '\n');
-		if(!p)
-			break;
-		if(p[0] == '>' && p[1] == 'H' && p[2] == '<')
-			return (Boffset(bdict)-Blinelen(bdict));
-	}
-	return -1;
-}
-
-void
-pcollprintkey(void)
-{
-	Bprint(bout, "No pronunciation key yet\n");
-}
-
-/*
- * f points just after '>'; fe points at end of entry.
- * Expect next characters from bin to match:
- *  [^ <]+<
- *     tag
- * Accumulate the tag in tag[].
- * Return pointer to after final '<'.
- */
-static char *
-gettag(char *f, char *fe)
-{
-	char *t;
-	int c, i;
-
-	t = tag;
-	i = Buflen;
-	while(--i > 0) {
-		c = *f++;
-		if(c == '<' || f == fe)
-			break;
-		*t++ = c;
-	}
-	*t = 0;
-	return f;
-}

+ 0 - 257
sys/src/cmd/dict/pcollinsg.c

@@ -1,257 +0,0 @@
-/*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
-
-#include <u.h>
-#include <libc.h>
-#include <bio.h>
-#include "dict.h"
-
-/*
- * Routines for handling dictionaries in the "Paperback Collins"
- * `German' format (with tags surrounded by \5⋯\6 and \xba⋯\xba)
- */
-
-/*
- *	\5⋯\6 escapes (fonts, mostly)
- *
- *	h	headword (helvetica 7 pt)
- *	c	clause (helvetica 7 pt)
- *	3	helvetica 7 pt
- *	4	helvetica 6.5 pt
- *	s	helvetica 8 pt
- *	x	helvetica 8 pt
- *	y	helvetica 5 pt
- *	m	helvetica 30 pt
- *	1	roman 6 pt
- *	9	roman 4.5 pt
- *	p	roman 7 pt
- *	q	roman 4.5 pt
- *	2	italic 6 pt
- *	7	italic 4.5 pt
- *	b	bold 6 pt
- *	a	`indent 0:4 left'
- *	k	`keep 9'
- *	l	`size 12'
- */
-
-enum {
-	IBASE=L'i',	/* dotless i */
-	Taglen=32,
-};
-
-static Rune intab[256] = {
-	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
-/*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	TAGS,	TAGE,	NONE,
-	NONE,	NONE,	NONE,	NONE,	NONE,	L' ',	NONE,	NONE,
-/*10*/	NONE,	L'-',	L' ',	L' ',	NONE,	NONE,	NONE,	NONE,
-	L' ',	NONE,	NONE,	NONE,	L' ',	NONE,	NONE,	L'-',
-/*20*/	L' ',	L'!',	L'"',	L'#',	L'$',	L'%',	L'&',	L'\'',
-	L'(',	L')',	L'*',	L'+',	L',',	L'-',	L'.',	L'/',
-/*30*/  L'0',	L'1',	L'2',	L'3',	L'4',	L'5',	L'6',	L'7',
-	L'8',	L'9',	L':',	L';',	L'<',	L'=',	L'>',	L'?',
-/*40*/  L'@',	L'A',	L'B',	L'C',	L'D',	L'E',	L'F',	L'G',
-	L'H',	L'I',	L'J',	L'K',	L'L',	L'M',	L'N',	L'O',
-/*50*/	L'P',	L'Q',	L'R',	L'S',	L'T',	L'U',	L'V',	L'W',
-	L'X',	L'Y',	L'Z',	L'[',	L'\\',	L']',	L'^',	L'_',
-/*60*/	L'`',	L'a',	L'b',	L'c',	L'd',	L'e',	L'f',	L'g',
-	L'h',	L'i',	L'j',	L'k',	L'l',	L'm',	L'n',	L'o',
-/*70*/	L'p',	L'q',	L'r',	L's',	L't',	L'u',	L'v',	L'w',
-	L'x',	L'y',	L'z',	L'{',	L'|',	L'}',	L'~',	NONE,
-/*80*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-	NONE,	NONE,	L' ',	NONE,	NONE,	NONE,	NONE,	NONE,
-/*90*/	L'ß',	L'æ',	NONE,	MOE,	NONE,	NONE,	NONE,	L'ø',
-	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-/*A0*/	NONE,	NONE,	L'"',	L'£',	NONE,	NONE,	NONE,	NONE,
-	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-/*B0*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	L'~',
-	NONE,	IBASE,	SPCS,	NONE,	NONE,	NONE,	NONE,	NONE,
-/*C0*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-/*D0*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-/*E0*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-/*F0*/	L' ',	L' ',	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
-};
-
-static Nassoc numtab[] = {
-	{1,	L'+'},
-	{4,	L'='},
-	{7,	L'°'},
-	{11,	L'≈'},
-	{69,	L'♦'},
-	{114,	L'®'},
-	{340,	L'ɛ'},
-	{341,	L'ɔ'},
-	{342,	L'ʌ'},
-	{343,	L'ə'},
-	{345,	L'ʒ'},
-	{346,	L'ʃ'},
-	{347,	L'ɵ'},
-	{348,	L'ʊ'},
-	{349,	L'ˈ'},
-	{351,	L'ɪ'},
-	{352,	L'ɜ'},
-	{354,	L'ɑ'},
-	{355,	L'~'},
-	{356,	L'ɒ'},
-	{384,	L'ɳ'},
-	{445,	L'ð'},	/* BUG -- should be script eth */
-};
-
-static Nassoc overtab[] = {
-	{L',',	LCED},
-	{L'/',	LACU},
-	{L':',	LUML},
-	{L'\\',	LGRV},
-	{L'^',	LFRN},
-	{L'~',	LTIL},
-};
-
-static uint8_t *reach(uint8_t*, int);
-
-static Entry	curentry;
-static char	tag[Taglen];
-
-void
-pcollgprintentry(Entry e, int cmd)
-{
-	uint8_t *p, *pe;
-	int r, rprev = NONE, rx, over = 0, font;
-	char buf[16];
-
-	p = (uint8_t *)e.start;
-	pe = (uint8_t *)e.end;
-	curentry = e;
-	if(cmd == 'h')
-		outinhibit = 1;
-	while(p < pe){
-		if(cmd == 'r'){
-			outchar(*p++);
-			continue;
-		}
-		switch(r = intab[*p++]){
-		case TAGS:
-			if(rprev != NONE){
-				outrune(rprev);
-				rprev = NONE;
-			}
-			p = reach(p, 0x06);
-			font = tag[0];
-			if(cmd == 'h')
-				outinhibit = (font != 'h');
-			break;
-
-		case TAGE:	/* an extra one */
-			break;
-
-		case SPCS:
-			p = reach(p, 0xba);
-			r = looknassoc(numtab, asize(numtab), strtol(tag,0,0));
-			if(r < 0){
-				if(rprev != NONE){
-					outrune(rprev);
-					rprev = NONE;
-				}
-				sprint(buf, "\\N'%s'", tag);
-				outchars(buf);
-				break;