1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738 |
- /*
- * This file is part of the UCB release of Plan 9. It is subject to the license
- * terms in the LICENSE file found in the top-level directory of this
- * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
- * part of the UCB release of Plan 9, including this file, may be copied,
- * modified, propagated, or distributed except according to the terms contained
- * in the LICENSE file.
- */
- #include <u.h>
- #include <libc.h>
- #include <bio.h>
- /*
- bugs:
- 00/ff for end of file can conflict with 00/ff characters
- */
- static Rune ljan[] = {'J','A','N'};
- static Rune lfeb[] = {'F','E','B'};
- static Rune lmar[] = {'M','A','R'};
- static Rune lapr[] = {'A','P','R'};
- static Rune lmay[] = {'M','A','Y'};
- static Rune ljun[] = {'J','U','N'};
- static Rune ljul[] = {'J','U','L'};
- static Rune laug[] = {'A','U','G'};
- static Rune lsep[] = {'S','E','P'};
- static Rune loct[] = {'O','C','T'};
- static Rune lnov[] = {'N','O','V'};
- static Rune ldec[] = {'D','E','C'};
- enum
- {
- Nline = 100000, /* default max number of lines saved in memory */
- Nmerge = 10, /* max number of temporary files merged */
- Nfield = 20, /* max number of argument fields */
- Bflag = 1<<0, /* flags per field */
- B1flag = 1<<1,
- Dflag = 1<<2,
- Fflag = 1<<3,
- Gflag = 1<<4,
- Iflag = 1<<5,
- Mflag = 1<<6,
- Nflag = 1<<7,
- Rflag = 1<<8,
- Wflag = 1<<9,
- NSstart = 0, /* states for number to key decoding */
- NSsign,
- NSzero,
- NSdigit,
- NSpoint,
- NSfract,
- NSzerofract,
- NSexp,
- NSexpsign,
- NSexpdigit,
- };
- typedef struct Line Line;
- typedef struct Key Key;
- typedef struct Merge Merge;
- typedef struct Field Field;
- struct Line
- {
- Key* key;
- int llen; /* always >= 1 */
- uint8_t line[1]; /* always ends in '\n' */
- };
- struct Merge
- {
- Key* key; /* copy of line->key so (Line*) looks like (Merge*) */
- Line* line; /* line at the head of a merged temp file */
- int fd; /* file descriptor */
- Biobuf b; /* iobuf for reading a temp file */
- };
- struct Key
- {
- int klen;
- uint8_t key[1];
- };
- struct Field
- {
- int beg1;
- int beg2;
- int end1;
- int end2;
- int32_t flags;
- uint8_t mapto[1+255];
- void (*dokey)(Key*, uint8_t*, uint8_t*, Field*);
- };
- struct args
- {
- char* ofile;
- char* tname;
- Rune tabchar;
- char cflag;
- char uflag;
- char vflag;
- int nfield;
- int nfile;
- Field field[Nfield];
- Line** linep;
- int32_t nline; /* number of lines in this temp file */
- int32_t lineno; /* overall ordinal for -s option */
- int ntemp;
- int32_t mline; /* max lines per file */
- } args;
- extern Rune* month[12];
- void buildkey(Line*);
- void doargs(int, char*[]);
- void dofield(char*, int*, int*, int, int);
- void dofile(Biobuf*);
- void dokey_(Key*, uint8_t*, uint8_t*, Field*);
- void dokey_dfi(Key*, uint8_t*, uint8_t*, Field*);
- void dokey_gn(Key*, uint8_t*, uint8_t*, Field*);
- void dokey_m(Key*, uint8_t*, uint8_t*, Field*);
- void dokey_r(Key*, uint8_t*, uint8_t*, Field*);
- void done(char*);
- int kcmp(Key*, Key*);
- void makemapd(Field*);
- void makemapm(Field*);
- void mergefiles(int, int, Biobuf*);
- void mergeout(Biobuf*);
- void newfield(void);
- Line* newline(Biobuf*);
- void nomem(void);
- void notifyf(void *c, char*);
- void printargs(void);
- void printout(Biobuf*);
- void setfield(int, int);
- uint8_t* skip(uint8_t*, int, int, int, int);
- void sort4(void *c, uint32_t);
- char* tempfile(int);
- void tempout(void);
- void lineout(Biobuf*, Line*);
- void
- main(int argc, char *argv[])
- {
- int i, f;
- char *s;
- Biobuf bbuf;
- notify(notifyf); /**/
- doargs(argc, argv);
- if(args.vflag)
- printargs();
- for(i=1; i<argc; i++) {
- s = argv[i];
- if(s == 0)
- continue;
- if(strcmp(s, "-") == 0) {
- Binit(&bbuf, 0, OREAD);
- dofile(&bbuf);
- Bterm(&bbuf);
- continue;
- }
- f = open(s, OREAD);
- if(f < 0) {
- fprint(2, "sort: open %s: %r\n", s);
- done("open");
- }
- Binit(&bbuf, f, OREAD);
- dofile(&bbuf);
- Bterm(&bbuf);
- close(f);
- }
- if(args.nfile == 0) {
- Binit(&bbuf, 0, OREAD);
- dofile(&bbuf);
- Bterm(&bbuf);
- }
- if(args.cflag)
- done(0);
- if(args.vflag)
- fprint(2, "=========\n");
- f = 1;
- if(args.ofile) {
- f = create(args.ofile, OWRITE, 0666);
- if(f < 0) {
- fprint(2, "sort: create %s: %r\n", args.ofile);
- done("create");
- }
- }
- Binit(&bbuf, f, OWRITE);
- if(args.ntemp) {
- tempout();
- mergeout(&bbuf);
- } else {
- printout(&bbuf);
- }
- Bterm(&bbuf);
- done(0);
- }
- void
- dofile(Biobuf *b)
- {
- Line *l, *ol;
- int n;
- if(args.cflag) {
- ol = newline(b);
- if(ol == 0)
- return;
- for(;;) {
- l = newline(b);
- if(l == 0)
- break;
- n = kcmp(ol->key, l->key);
- if(n > 0 || (n == 0 && args.uflag)) {
- fprint(2, "sort: -c file not in sort\n"); /**/
- done("order");
- }
- free(ol->key);
- free(ol);
- ol = l;
- }
- return;
- }
- if(args.linep == 0) {
- args.linep = malloc(args.mline * sizeof(args.linep));
- if(args.linep == 0)
- nomem();
- }
- for(;;) {
- l = newline(b);
- if(l == 0)
- break;
- if(args.nline >= args.mline)
- tempout();
- args.linep[args.nline] = l;
- args.nline++;
- args.lineno++;
- }
- }
- void
- notifyf(void *c, char *s)
- {
- if(strcmp(s, "interrupt") == 0)
- done(0);
- if(strcmp(s, "hangup") == 0)
- done(0);
- if(strcmp(s, "kill") == 0)
- done(0);
- if(strncmp(s, "sys: write on closed pipe", 25) == 0)
- done(0);
- fprint(2, "sort: note: %s\n", s);
- abort();
- }
- Line*
- newline(Biobuf *b)
- {
- Line *l;
- char *p;
- int n, c;
- p = Brdline(b, '\n');
- n = Blinelen(b);
- if(p == 0) {
- if(n == 0)
- return 0;
- l = 0;
- for(n=0;;) {
- if((n & 31) == 0) {
- l = realloc(l, sizeof(Line) +
- (n+31)*sizeof(l->line[0]));
- if(l == 0)
- nomem();
- }
- c = Bgetc(b);
- if(c < 0) {
- fprint(2, "sort: newline added\n");
- c = '\n';
- }
- l->line[n++] = c;
- if(c == '\n')
- break;
- }
- l->llen = n;
- buildkey(l);
- return l;
- }
- l = malloc(sizeof(Line) +
- (n-1)*sizeof(l->line[0]));
- if(l == 0)
- nomem();
- l->llen = n;
- memmove(l->line, p, n);
- buildkey(l);
- return l;
- }
- void
- lineout(Biobuf *b, Line *l)
- {
- int n, m;
- n = l->llen;
- m = Bwrite(b, l->line, n);
- if(n != m)
- exits("write");
- }
- void
- tempout(void)
- {
- int32_t n;
- Line **lp, *l;
- char *tf;
- int f;
- Biobuf tb;
- sort4(args.linep, args.nline);
- tf = tempfile(args.ntemp);
- args.ntemp++;
- f = create(tf, OWRITE, 0666);
- if(f < 0) {
- fprint(2, "sort: create %s: %r\n", tf);
- done("create");
- }
- Binit(&tb, f, OWRITE);
- lp = args.linep;
- for(n=args.nline; n>0; n--) {
- l = *lp++;
- lineout(&tb, l);
- free(l->key);
- free(l);
- }
- args.nline = 0;
- Bterm(&tb);
- close(f);
- }
- void
- done(char *xs)
- {
- int i;
- for(i=0; i<args.ntemp; i++)
- remove(tempfile(i));
- exits(xs);
- }
- void
- nomem(void)
- {
- fprint(2, "sort: out of memory\n");
- done("mem");
- }
- char*
- tempfile(int n)
- {
- static char file[100];
- static uint pid;
- char *dir;
- dir = "/tmp";
- if(args.tname)
- dir = args.tname;
- if(strlen(dir) >= nelem(file)-20) {
- fprint(2, "temp file directory name is too int32_t: %s\n", dir);
- done("tdir");
- }
- if(pid == 0) {
- pid = getpid();
- if(pid == 0) {
- pid = time(0);
- if(pid == 0)
- pid = 1;
- }
- }
- sprint(file, "%s/sort.%.4d.%.4d", dir, pid%10000, n);
- return file;
- }
- void
- mergeout(Biobuf *b)
- {
- int n, i, f;
- char *tf;
- Biobuf tb;
- for(i=0; i<args.ntemp; i+=n) {
- n = args.ntemp - i;
- if(n > Nmerge) {
- tf = tempfile(args.ntemp);
- args.ntemp++;
- f = create(tf, OWRITE, 0666);
- if(f < 0) {
- fprint(2, "sort: create %s: %r\n", tf);
- done("create");
- }
- Binit(&tb, f, OWRITE);
- n = Nmerge;
- mergefiles(i, n, &tb);
- Bterm(&tb);
- close(f);
- } else
- mergefiles(i, n, b);
- }
- }
- void
- mergefiles(int t, int n, Biobuf *b)
- {
- Merge *m, *mp, **mmp;
- Key *ok;
- Line *l;
- char *tf;
- int i, f, nn;
- mmp = malloc(n*sizeof(*mmp));
- mp = malloc(n*sizeof(*mp));
- if(mmp == 0 || mp == 0)
- nomem();
- nn = 0;
- m = mp;
- for(i=0; i<n; i++,m++) {
- tf = tempfile(t+i);
- f = open(tf, OREAD);
- if(f < 0) {
- fprint(2, "sort: reopen %s: %r\n", tf);
- done("open");
- }
- m->fd = f;
- Binit(&m->b, f, OREAD);
- mmp[nn] = m;
- l = newline(&m->b);
- if(l == 0)
- continue;
- nn++;
- m->line = l;
- m->key = l->key;
- }
- ok = 0;
- for(;;) {
- sort4(mmp, nn);
- m = *mmp;
- if(nn == 0)
- break;
- for(;;) {
- l = m->line;
- if(args.uflag && ok && kcmp(ok, l->key) == 0) {
- free(l->key);
- free(l);
- } else {
- lineout(b, l);
- if(ok)
- free(ok);
- ok = l->key;
- free(l);
- }
- l = newline(&m->b);
- if(l == 0) {
- nn--;
- mmp[0] = mmp[nn];
- break;
- }
- m->line = l;
- m->key = l->key;
- if(nn > 1 && kcmp(mmp[0]->key, mmp[1]->key) > 0)
- break;
- }
- }
- if(ok)
- free(ok);
- m = mp;
- for(i=0; i<n; i++,m++) {
- Bterm(&m->b);
- close(m->fd);
- }
- free(mp);
- free(mmp);
- }
- int
- kcmp(Key *ka, Key *kb)
- {
- int n, m;
- /*
- * set n to length of smaller key
- */
- n = ka->klen;
- m = kb->klen;
- if(n > m)
- n = m;
- return memcmp(ka->key, kb->key, n);
- }
- void
- printout(Biobuf *b)
- {
- int32_t n;
- Line **lp, *l;
- Key *ok;
- sort4(args.linep, args.nline);
- lp = args.linep;
- ok = 0;
- for(n=args.nline; n>0; n--) {
- l = *lp++;
- if(args.uflag && ok && kcmp(ok, l->key) == 0)
- continue;
- lineout(b, l);
- ok = l->key;
- }
- }
- void
- setfield(int n, int c)
- {
- Field *f;
- f = &args.field[n];
- switch(c) {
- default:
- fprint(2, "sort: unknown option: field.%C\n", c);
- done("option");
- case 'b': /* skip blanks */
- f->flags |= Bflag;
- break;
- case 'd': /* directory order */
- f->flags |= Dflag;
- break;
- case 'f': /* fold case */
- f->flags |= Fflag;
- break;
- case 'g': /* floating point -n case */
- f->flags |= Gflag;
- break;
- case 'i': /* ignore non-ascii */
- f->flags |= Iflag;
- break;
- case 'M': /* month */
- f->flags |= Mflag;
- break;
- case 'n': /* numbers */
- f->flags |= Nflag;
- break;
- case 'r': /* reverse */
- f->flags |= Rflag;
- break;
- case 'w': /* ignore white */
- f->flags |= Wflag;
- break;
- }
- }
- void
- dofield(char *s, int *n1, int *n2, int off1, int off2)
- {
- int c, n;
- c = *s++;
- if(c >= '0' && c <= '9') {
- n = 0;
- while(c >= '0' && c <= '9') {
- n = n*10 + (c-'0');
- c = *s++;
- }
- n -= off1; /* posix committee: rot in hell */
- if(n < 0) {
- fprint(2, "sort: field offset must be positive\n");
- done("option");
- }
- *n1 = n;
- }
- if(c == '.') {
- c = *s++;
- if(c >= '0' && c <= '9') {
- n = 0;
- while(c >= '0' && c <= '9') {
- n = n*10 + (c-'0');
- c = *s++;
- }
- n -= off2;
- if(n < 0) {
- fprint(2, "sort: character offset must be positive\n");
- done("option");
- }
- *n2 = n;
- }
- }
- while(c != 0) {
- setfield(args.nfield, c);
- c = *s++;
- }
- }
- void
- printargs(void)
- {
- int i, n;
- Field *f;
- char *prefix;
- fprint(2, "sort");
- for(i=0; i<=args.nfield; i++) {
- f = &args.field[i];
- prefix = " -";
- if(i) {
- n = f->beg1;
- if(n >= 0)
- fprint(2, " +%d", n);
- else
- fprint(2, " +*");
- n = f->beg2;
- if(n >= 0)
- fprint(2, ".%d", n);
- else
- fprint(2, ".*");
- if(f->flags & B1flag)
- fprint(2, "b");
- n = f->end1;
- if(n >= 0)
- fprint(2, " -%d", n);
- else
- fprint(2, " -*");
- n = f->end2;
- if(n >= 0)
- fprint(2, ".%d", n);
- else
- fprint(2, ".*");
- prefix = "";
- }
- if(f->flags & Bflag)
- fprint(2, "%sb", prefix);
- if(f->flags & Dflag)
- fprint(2, "%sd", prefix);
- if(f->flags & Fflag)
- fprint(2, "%sf", prefix);
- if(f->flags & Gflag)
- fprint(2, "%sg", prefix);
- if(f->flags & Iflag)
- fprint(2, "%si", prefix);
- if(f->flags & Mflag)
- fprint(2, "%sM", prefix);
- if(f->flags & Nflag)
- fprint(2, "%sn", prefix);
- if(f->flags & Rflag)
- fprint(2, "%sr", prefix);
- if(f->flags & Wflag)
- fprint(2, "%sw", prefix);
- }
- if(args.cflag)
- fprint(2, " -c");
- if(args.uflag)
- fprint(2, " -u");
- if(args.ofile)
- fprint(2, " -o %s", args.ofile);
- if(args.mline != Nline)
- fprint(2, " -l %ld", args.mline);
- fprint(2, "\n");
- }
- void
- newfield(void)
- {
- int n;
- Field *f;
- n = args.nfield + 1;
- if(n >= Nfield) {
- fprint(2, "sort: too many fields specified\n");
- done("option");
- }
- args.nfield = n;
- f = &args.field[n];
- f->beg1 = -1;
- f->beg2 = -1;
- f->end1 = -1;
- f->end2 = -1;
- }
- void
- doargs(int argc, char *argv[])
- {
- int i, c, hadplus;
- char *s, *p, *q;
- Field *f;
- hadplus = 0;
- args.mline = Nline;
- for(i=1; i<argc; i++) {
- s = argv[i];
- c = *s++;
- if(c == '-') {
- c = *s;
- if(c == 0) /* forced end of arg marker */
- break;
- argv[i] = 0; /* clobber args processed */
- if(c == '.' || (c >= '0' && c <= '9')) {
- if(!hadplus)
- newfield();
- f = &args.field[args.nfield];
- dofield(s, &f->end1, &f->end2, 0, 0);
- hadplus = 0;
- continue;
- }
- while(c = *s++)
- switch(c) {
- case '-': /* end of options */
- i = argc;
- continue;
- case 'T': /* temp directory */
- if(*s == 0) {
- i++;
- if(i < argc) {
- args.tname = argv[i];
- argv[i] = 0;
- }
- } else
- args.tname = s;
- s = strchr(s, 0);
- break;
- case 'o': /* output file */
- if(*s == 0) {
- i++;
- if(i < argc) {
- args.ofile = argv[i];
- argv[i] = 0;
- }
- } else
- args.ofile = s;
- s = strchr(s, 0);
- break;
- case 'k': /* posix key (what were they thinking?) */
- p = 0;
- if(*s == 0) {
- i++;
- if(i < argc) {
- p = argv[i];
- argv[i] = 0;
- }
- } else
- p = s;
- s = strchr(s, 0);
- if(p == 0)
- break;
- newfield();
- q = strchr(p, ',');
- if(q)
- *q++ = 0;
- f = &args.field[args.nfield];
- dofield(p, &f->beg1, &f->beg2, 1, 1);
- if(f->flags & Bflag) {
- f->flags |= B1flag;
- f->flags &= ~Bflag;
- }
- if(q) {
- dofield(q, &f->end1, &f->end2, 1, 0);
- if(f->end2 <= 0)
- f->end1++;
- }
- hadplus = 0;
- break;
- case 't': /* tab character */
- if(*s == 0) {
- i++;
- if(i < argc) {
- chartorune(&args.tabchar, argv[i]);
- argv[i] = 0;
- }
- } else
- s += chartorune(&args.tabchar, s);
- if(args.tabchar == '\n') {
- fprint(2, "aw come on, rob\n");
- done("rob");
- }
- break;
- case 'c': /* check order */
- args.cflag = 1;
- break;
- case 'u': /* unique */
- args.uflag = 1;
- break;
- case 'v': /* debugging noise */
- args.vflag = 1;
- break;
- case 'l':
- if(*s == 0) {
- i++;
- if(i < argc) {
- args.mline = atol(argv[i]);
- argv[i] = 0;
- }
- } else
- args.mline = atol(s);
- s = strchr(s, 0);
- break;
- case 'M': /* month */
- case 'b': /* skip blanks */
- case 'd': /* directory order */
- case 'f': /* fold case */
- case 'g': /* floating numbers */
- case 'i': /* ignore non-ascii */
- case 'n': /* numbers */
- case 'r': /* reverse */
- case 'w': /* ignore white */
- if(args.nfield > 0)
- fprint(2, "sort: global field set after -k\n");
- setfield(0, c);
- break;
- case 'm':
- /* option m silently ignored but required by posix */
- break;
- default:
- fprint(2, "sort: unknown option: -%C\n", c);
- done("option");
- }
- continue;
- }
- if(c == '+') {
- argv[i] = 0; /* clobber args processed */
- c = *s;
- if(c == '.' || (c >= '0' && c <= '9')) {
- newfield();
- f = &args.field[args.nfield];
- dofield(s, &f->beg1, &f->beg2, 0, 0);
- if(f->flags & Bflag) {
- f->flags |= B1flag;
- f->flags &= ~Bflag;
- }
- hadplus = 1;
- continue;
- }
- fprint(2, "sort: unknown option: +%C\n", c);
- done("option");
- }
- args.nfile++;
- }
- for(i=0; i<=args.nfield; i++) {
- f = &args.field[i];
- /*
- * global options apply to fields that
- * specify no options
- */
- if(f->flags == 0) {
- f->flags = args.field[0].flags;
- if(args.field[0].flags & Bflag)
- f->flags |= B1flag;
- }
- /*
- * build buildkey specification
- */
- switch(f->flags & ~(Bflag|B1flag)) {
- default:
- fprint(2, "sort: illegal combination of flags: %lx\n", f->flags);
- done("option");
- case 0:
- f->dokey = dokey_;
- break;
- case Rflag:
- f->dokey = dokey_r;
- break;
- case Gflag:
- case Nflag:
- case Gflag|Nflag:
- case Gflag|Rflag:
- case Nflag|Rflag:
- case Gflag|Nflag|Rflag:
- f->dokey = dokey_gn;
- break;
- case Mflag:
- case Mflag|Rflag:
- f->dokey = dokey_m;
- makemapm(f);
- break;
- case Dflag:
- case Dflag|Fflag:
- case Dflag|Fflag|Iflag:
- case Dflag|Fflag|Iflag|Rflag:
- case Dflag|Fflag|Iflag|Rflag|Wflag:
- case Dflag|Fflag|Iflag|Wflag:
- case Dflag|Fflag|Rflag:
- case Dflag|Fflag|Rflag|Wflag:
- case Dflag|Fflag|Wflag:
- case Dflag|Iflag:
- case Dflag|Iflag|Rflag:
- case Dflag|Iflag|Rflag|Wflag:
- case Dflag|Iflag|Wflag:
- case Dflag|Rflag:
- case Dflag|Rflag|Wflag:
- case Dflag|Wflag:
- case Fflag:
- case Fflag|Iflag:
- case Fflag|Iflag|Rflag:
- case Fflag|Iflag|Rflag|Wflag:
- case Fflag|Iflag|Wflag:
- case Fflag|Rflag:
- case Fflag|Rflag|Wflag:
- case Fflag|Wflag:
- case Iflag:
- case Iflag|Rflag:
- case Iflag|Rflag|Wflag:
- case Iflag|Wflag:
- case Wflag:
- f->dokey = dokey_dfi;
- makemapd(f);
- break;
- }
- }
- /*
- * random spot checks
- */
- if(args.nfile > 1 && args.cflag) {
- fprint(2, "sort: -c can have at most one input file\n");
- done("option");
- }
- return;
- }
- uint8_t*
- skip(uint8_t *l, int n1, int n2, int bflag, int endfield)
- {
- int i, c, tc;
- Rune r;
- if(endfield && n1 < 0)
- return 0;
- c = *l++;
- tc = args.tabchar;
- if(tc) {
- if(tc < Runeself) {
- for(i=n1; i>0; i--) {
- while(c != tc) {
- if(c == '\n')
- return 0;
- c = *l++;
- }
- if(!(endfield && i == 1))
- c = *l++;
- }
- } else {
- l--;
- l += chartorune(&r, (char*)l);
- for(i=n1; i>0; i--) {
- while(r != tc) {
- if(r == '\n')
- return 0;
- l += chartorune(&r, (char*)l);
- }
- if(!(endfield && i == 1))
- l += chartorune(&r, (char*)l);
- }
- c = r;
- }
- } else {
- for(i=n1; i>0; i--) {
- while(c == ' ' || c == '\t')
- c = *l++;
- while(c != ' ' && c != '\t') {
- if(c == '\n')
- return 0;
- c = *l++;
- }
- }
- }
- if(bflag)
- while(c == ' ' || c == '\t')
- c = *l++;
- l--;
- for(i=n2; i>0; i--) {
- c = *l;
- if(c < Runeself) {
- if(c == '\n')
- return 0;
- l++;
- continue;
- }
- l += chartorune(&r, (char*)l);
- }
- return l;
- }
- void
- dokey_gn(Key *k, uint8_t *lp, uint8_t *lpe, Field *f)
- {
- uint8_t *kp;
- int c, cl, dp;
- int state, nzero, exp, expsign, rflag;
- cl = k->klen + 3;
- kp = k->key + cl; /* skip place for sign, exponent[2] */
- nzero = 0; /* number of trailing zeros */
- exp = 0; /* value of the exponent */
- expsign = 0; /* sign of the exponent */
- dp = 0x4040; /* location of decimal point */
- rflag = f->flags&Rflag; /* xor of rflag and - sign */
- state = NSstart;
- for(;; lp++) {
- if(lp >= lpe)
- break;
- c = *lp;
- if(c == ' ' || c == '\t') {
- switch(state) {
- case NSstart:
- case NSsign:
- continue;
- }
- break;
- }
- if(c == '+' || c == '-') {
- switch(state) {
- case NSstart:
- state = NSsign;
- if(c == '-')
- rflag = !rflag;
- continue;
- case NSexp:
- state = NSexpsign;
- if(c == '-')
- expsign = 1;
- continue;
- }
- break;
- }
- if(c == '0') {
- switch(state) {
- case NSdigit:
- if(rflag)
- c = ~c;
- *kp++ = c;
- cl++;
- nzero++;
- dp++;
- state = NSdigit;
- continue;
- case NSfract:
- if(rflag)
- c = ~c;
- *kp++ = c;
- cl++;
- nzero++;
- state = NSfract;
- continue;
- case NSstart:
- case NSsign:
- case NSzero:
- state = NSzero;
- continue;
- case NSzerofract:
- case NSpoint:
- dp--;
- state = NSzerofract;
- continue;
- case NSexpsign:
- case NSexp:
- case NSexpdigit:
- exp = exp*10 + (c - '0');
- state = NSexpdigit;
- continue;
- }
- break;
- }
- if(c >= '1' && c <= '9') {
- switch(state) {
- case NSzero:
- case NSstart:
- case NSsign:
- case NSdigit:
- if(rflag)
- c = ~c;
- *kp++ = c;
- cl++;
- nzero = 0;
- dp++;
- state = NSdigit;
- continue;
- case NSzerofract:
- case NSpoint:
- case NSfract:
- if(rflag)
- c = ~c;
- *kp++ = c;
- cl++;
- nzero = 0;
- state = NSfract;
- continue;
- case NSexpsign:
- case NSexp:
- case NSexpdigit:
- exp = exp*10 + (c - '0');
- state = NSexpdigit;
- continue;
- }
- break;
- }
- if(c == '.') {
- switch(state) {
- case NSstart:
- case NSsign:
- state = NSpoint;
- continue;
- case NSzero:
- state = NSzerofract;
- continue;
- case NSdigit:
- state = NSfract;
- continue;
- }
- break;
- }
- if((f->flags & Gflag) && (c == 'e' || c == 'E')) {
- switch(state) {
- case NSdigit:
- case NSfract:
- state = NSexp;
- continue;
- }
- break;
- }
- break;
- }
- switch(state) {
- /*
- * result is zero
- */
- case NSstart:
- case NSsign:
- case NSzero:
- case NSzerofract:
- case NSpoint:
- kp = k->key + k->klen;
- k->klen += 2;
- kp[0] = 0x20; /* between + and - */
- kp[1] = 0;
- return;
- /*
- * result has exponent
- */
- case NSexpsign:
- case NSexp:
- case NSexpdigit:
- if(expsign)
- exp = -exp;
- dp += exp;
- /*
- * result is fixed point number
- */
- case NSdigit:
- case NSfract:
- kp -= nzero;
- cl -= nzero;
- break;
- }
- /*
- * end of number
- */
- c = 0;
- if(rflag)
- c = ~c;
- *kp = c;
- /*
- * sign and exponent
- */
- c = 0x30;
- if(rflag) {
- c = 0x10;
- dp = ~dp;
- }
- kp = k->key + k->klen;
- kp[0] = c;
- kp[1] = (dp >> 8);
- kp[2] = dp;
- k->klen = cl+1;
- }
- void
- dokey_m(Key *k, uint8_t *lp, uint8_t *lpe, Field *f)
- {
- uint8_t *kp;
- Rune r, place[3];
- int c, cl, pc;
- int rflag;
- rflag = f->flags&Rflag;
- pc = 0;
- cl = k->klen;
- kp = k->key + cl;
- for(;;) {
- /*
- * get the character
- */
- if(lp >= lpe)
- break;
- c = *lp;
- if(c >= Runeself) {
- lp += chartorune(&r, (char*)lp);
- c = r;
- } else
- lp++;
- if(c < nelem(f->mapto)) {
- c = f->mapto[c];
- if(c == 0)
- continue;
- }
- place[pc++] = c;
- if(pc < 3)
- continue;
- for(c=11; c>=0; c--)
- if(memcmp(month[c], place, sizeof(place)) == 0)
- break;
- c += 10;
- if(rflag)
- c = ~c;
- *kp++ = c;
- cl++;
- break;
- }
- c = 0;
- if(rflag)
- c = ~c;
- *kp = c;
- k->klen = cl+1;
- }
- void
- dokey_dfi(Key *k, uint8_t *lp, uint8_t *lpe, Field *f)
- {
- uint8_t *kp;
- Rune r;
- int c, cl, n, rflag;
- cl = k->klen;
- kp = k->key + cl;
- rflag = f->flags & Rflag;
- for(;;) {
- /*
- * get the character
- */
- if(lp >= lpe)
- break;
- c = *lp;
- if(c >= Runeself) {
- lp += chartorune(&r, (char*)lp);
- c = r;
- } else
- lp++;
- /*
- * do the various mappings.
- * the common case is handled
- * completely by the table.
- */
- if(c != 0 && c < Runeself) {
- c = f->mapto[c];
- if(c) {
- *kp++ = c;
- cl++;
- }
- continue;
- }
- /*
- * for characters out of range,
- * the table does not do Rflag.
- * ignore is based on mapto[nelem(f->mapto)-1]
- */
- if(c != 0 && c < nelem(f->mapto)) {
- c = f->mapto[c];
- if(c == 0)
- continue;
- } else {
- if(f->mapto[nelem(f->mapto)-1] == 0)
- continue;
- /*
- * consider building maps as necessary
- */
- if(f->flags & Fflag)
- c = tolowerrune(tobaserune(c));
- if(f->flags & Dflag && !isalpharune(c) &&
- !isdigitrune(c) && !isspacerune(c))
- continue;
- if((f->flags & Wflag) && isspacerune(c))
- continue;
- }
- /*
- * put it in the key
- */
- r = c;
- n = runetochar((char*)kp, &r);
- kp += n;
- cl += n;
- if(rflag)
- while(n > 0) {
- kp[-n] = ~kp[-n];
- n--;
- }
- }
- /*
- * end of key
- */
- k->klen = cl+1;
- if(rflag) {
- *kp = ~0;
- return;
- }
- *kp = 0;
- }
- void
- dokey_r(Key *k, uint8_t *lp, uint8_t *lpe, Field *f)
- {
- int cl, n;
- uint8_t *kp;
- n = lpe - lp;
- if(n < 0)
- n = 0;
- cl = k->klen;
- kp = k->key + cl;
- k->klen = cl+n+1;
- lpe -= 3;
- while(lp < lpe) {
- kp[0] = ~lp[0];
- kp[1] = ~lp[1];
- kp[2] = ~lp[2];
- kp[3] = ~lp[3];
- kp += 4;
- lp += 4;
- }
- lpe += 3;
- while(lp < lpe)
- *kp++ = ~*lp++;
- *kp = ~0;
- }
- void
- dokey_(Key *k, uint8_t *lp, uint8_t *lpe, Field *f)
- {
- int n, cl;
- uint8_t *kp;
- n = lpe - lp;
- if(n < 0)
- n = 0;
- cl = k->klen;
- kp = k->key + cl;
- k->klen = cl+n+1;
- memmove(kp, lp, n);
- kp[n] = 0;
- }
- void
- buildkey(Line *l)
- {
- Key *k;
- uint8_t *lp, *lpe;
- int ll, kl, cl, i, n;
- Field *f;
- ll = l->llen - 1;
- kl = 0; /* allocated length */
- cl = 0; /* current length */
- k = 0;
- for(i=1; i<=args.nfield; i++) {
- f = &args.field[i];
- lp = skip(l->line, f->beg1, f->beg2, f->flags&B1flag, 0);
- if(lp == 0)
- lp = l->line + ll;
- lpe = skip(l->line, f->end1, f->end2, f->flags&Bflag, 1);
- if(lpe == 0)
- lpe = l->line + ll;
- n = (lpe - lp) + 1;
- if(n <= 0)
- n = 1;
- if(cl+(n+4) > kl) {
- kl = cl+(n+4);
- k = realloc(k, sizeof(Key) +
- (kl-1)*sizeof(k->key[0]));
- if(k == 0)
- nomem();
- }
- k->klen = cl;
- (*f->dokey)(k, lp, lpe, f);
- cl = k->klen;
- }
- /*
- * global comparisons
- */
- if(!(args.uflag && cl > 0)) {
- f = &args.field[0];
- if(cl+(ll+4) > kl) {
- kl = cl+(ll+4);
- k = realloc(k, sizeof(Key) +
- (kl-1)*sizeof(k->key[0]));
- if(k == 0)
- nomem();
- }
- k->klen = cl;
- (*f->dokey)(k, l->line, l->line+ll, f);
- cl = k->klen;
- }
- l->key = k;
- k->klen = cl;
- if(args.vflag) {
- if(write(2, l->line, l->llen) != l->llen)
- exits("write");
- for(i=0; i<k->klen; i++) {
- fprint(2, " %.2x", k->key[i]);
- if(k->key[i] == 0x00 || k->key[i] == 0xff)
- fprint(2, "\n");
- }
- }
- }
- void
- makemapm(Field *f)
- {
- int i, c;
- for(i=0; i<nelem(f->mapto); i++) {
- c = 1;
- if(i == ' ' || i == '\t')
- c = 0;
- if(i >= 'a' && i <= 'z')
- c = i + ('A' - 'a');
- if(i >= 'A' && i <= 'Z')
- c = i;
- f->mapto[i] = c;
- if(args.vflag) {
- if((i & 15) == 0)
- fprint(2, " ");
- fprint(2, " %.2x", c);
- if((i & 15) == 15)
- fprint(2, "\n");
- }
- }
- }
- void
- makemapd(Field *f)
- {
- int i, c;
- for(i=0; i<nelem(f->mapto); i++) {
- c = i;
- if(f->flags & Iflag)
- if(c < 040 || c > 0176)
- c = -1;
- if((f->flags & Wflag) && c >= 0)
- if(c == ' ' || c == '\t')
- c = -1;
- if((f->flags & Dflag) && c >= 0)
- if(!(c == ' ' || c == '\t' ||
- (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= '0' && c <= '9'))){
- if(!isupperrune(c = toupperrune(c)))
- c = -1;
- }
- if((f->flags & Fflag) && c >= 0)
- c = toupperrune(tobaserune(c));
- if((f->flags & Rflag) && c >= 0 && i > 0 && i < Runeself)
- c = ~c & 0xff;
- if(c < 0)
- c = 0;
- f->mapto[i] = c;
- if(args.vflag) {
- if((i & 15) == 0)
- fprint(2, " ");
- fprint(2, " %.2x", c);
- if((i & 15) == 15)
- fprint(2, "\n");
- }
- }
- }
- Rune* month[12] =
- {
- ljan,
- lfeb,
- lmar,
- lapr,
- lmay,
- ljun,
- ljul,
- laug,
- lsep,
- loct,
- lnov,
- ldec,
- };
- /************** radix sort ***********/
- enum
- {
- Threshold = 14,
- };
- void rsort4(Key***, uint32_t, int);
- void bsort4(Key***, uint32_t, int);
- void
- sort4(void *a, uint32_t n)
- {
- if(n > Threshold)
- rsort4((Key***)a, n, 0);
- else
- bsort4((Key***)a, n, 0);
- }
- void
- rsort4(Key ***a, uint32_t n, int b)
- {
- Key ***ea, ***t, ***u, **t1, **u1, *k;
- Key ***part[257];
- static int32_t count[257];
- int32_t clist[257+257], *cp, *cp1;
- int c, lowc, higc;
- /*
- * pass 1 over all keys,
- * count the number of each key[b].
- * find low count and high count.
- */
- lowc = 256;
- higc = 0;
- ea = a+n;
- for(t=a; t<ea; t++) {
- k = **t;
- n = k->klen;
- if(b >= n) {
- count[256]++;
- continue;
- }
- c = k->key[b];
- n = count[c]++;
- if(n == 0) {
- if(c < lowc)
- lowc = c;
- if(c > higc)
- higc = c;
- }
- }
- /*
- * pass 2 over all counts,
- * put partition pointers in part[c].
- * save compacted indexes and counts
- * in clist[].
- */
- t = a;
- n = count[256];
- clist[0] = n;
- part[256] = t;
- t += n;
- cp1 = clist+1;
- cp = count+lowc;
- for(c=lowc; c<=higc; c++,cp++) {
- n = *cp;
- if(n) {
- cp1[0] = n;
- cp1[1] = c;
- cp1 += 2;
- part[c] = t;
- t += n;
- }
- }
- *cp1 = 0;
- /*
- * pass 3 over all counts.
- * chase lowest pointer in each partition
- * around a permutation until it comes
- * back and is stored where it started.
- * static array, count[], should be
- * reduced to zero entries except maybe
- * count[256].
- */
- for(cp1=clist+1; cp1[0]; cp1+=2) {
- c = cp1[1];
- cp = count+c;
- while(*cp) {
- t1 = *part[c];
- for(;;) {
- k = *t1;
- n = 256;
- if(b < k->klen)
- n = k->key[b];
- u = part[n]++;
- count[n]--;
- u1 = *u;
- *u = t1;
- if(n == c)
- break;
- t1 = u1;
- }
- }
- }
- /*
- * pass 4 over all partitions.
- * call recursively.
- */
- b++;
- t = a + clist[0];
- count[256] = 0;
- for(cp1=clist+1; n=cp1[0]; cp1+=2) {
- if(n > Threshold)
- rsort4(t, n, b);
- else
- if(n > 1)
- bsort4(t, n, b);
- t += n;
- }
- }
- /*
- * bubble sort to pick up
- * the pieces.
- */
- void
- bsort4(Key ***a, uint32_t n, int b)
- {
- Key ***i, ***j, ***k, ***l, **t;
- Key *ka, *kb;
- int n1, n2;
- l = a+n;
- j = a;
- loop:
- i = j;
- j++;
- if(j >= l)
- return;
- ka = **i;
- kb = **j;
- n1 = ka->klen - b;
- n2 = kb->klen - b;
- if(n1 > n2)
- n1 = n2;
- if(n1 <= 0)
- goto loop;
- n2 = ka->key[b] - kb->key[b];
- if(n2 == 0)
- n2 = memcmp(ka->key+b, kb->key+b, n1);
- if(n2 <= 0)
- goto loop;
- for(;;) {
- k = i+1;
- t = *k;
- *k = *i;
- *i = t;
- if(i <= a)
- goto loop;
- i--;
- ka = **i;
- kb = *t;
- n1 = ka->klen - b;
- n2 = kb->klen - b;
- if(n1 > n2)
- n1 = n2;
- if(n1 <= 0)
- goto loop;
- n2 = ka->key[b] - kb->key[b];
- if(n2 == 0)
- n2 = memcmp(ka->key+b, kb->key+b, n1);
- if(n2 <= 0)
- goto loop;
- }
- }
|