123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366 |
- /* join F1 F2 on stuff */
- #include <u.h>
- #include <libc.h>
- #include <stdio.h>
- #include <ctype.h>
- #define F1 0
- #define F2 1
- #define F0 3
- #define NFLD 100 /* max field per line */
- #define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
- FILE *f[2];
- Rune buf[2][BUFSIZ]; /*input lines */
- Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */
- Rune *s1,*s2;
- int j1 = 1; /* join of this field of file 1 */
- int j2 = 1; /* join of this field of file 2 */
- int olist[2*NFLD]; /* output these fields */
- int olistf[2*NFLD]; /* from these files */
- int no; /* number of entries in olist */
- Rune sep1 = ' '; /* default field separator */
- Rune sep2 = '\t';
- char *sepstr=" ";
- int discard; /* count of truncated lines */
- Rune null[BUFSIZ] = L"";
- int a1;
- int a2;
- char *getoptarg(int*, char***);
- void output(int, int);
- int input(int);
- void oparse(char*);
- void error(char*, char*);
- void seek1(void), seek2(void);
- Rune *strtorune(Rune *, char *);
- void
- main(int argc, char **argv)
- {
- int i;
- while (argc > 1 && argv[1][0] == '-') {
- if (argv[1][1] == '\0')
- break;
- switch (argv[1][1]) {
- case '-':
- argc--;
- argv++;
- goto proceed;
- case 'a':
- switch(*getoptarg(&argc, &argv)) {
- case '1':
- a1++;
- break;
- case '2':
- a2++;
- break;
- default:
- error("incomplete option -a","");
- }
- break;
- case 'e':
- strtorune(null, getoptarg(&argc, &argv));
- break;
- case 't':
- sepstr=getoptarg(&argc, &argv);
- chartorune(&sep1, sepstr);
- sep2 = sep1;
- break;
- case 'o':
- if(argv[1][2]!=0 ||
- argc>2 && strchr(argv[2],',')!=0)
- oparse(getoptarg(&argc, &argv));
- else for (no = 0; no<2*NFLD && argc>2; no++){
- if (argv[2][0] == '1' && argv[2][1] == '.') {
- olistf[no] = F1;
- olist[no] = atoi(&argv[2][2]);
- } else if (argv[2][0] == '2' && argv[2][1] == '.') {
- olist[no] = atoi(&argv[2][2]);
- olistf[no] = F2;
- } else if (argv[2][0] == '0')
- olistf[no] = F0;
- else
- break;
- argc--;
- argv++;
- }
- break;
- case 'j':
- if(argc <= 2)
- break;
- if (argv[1][2] == '1')
- j1 = atoi(argv[2]);
- else if (argv[1][2] == '2')
- j2 = atoi(argv[2]);
- else
- j1 = j2 = atoi(argv[2]);
- argc--;
- argv++;
- break;
- case '1':
- j1 = atoi(getoptarg(&argc, &argv));
- break;
- case '2':
- j2 = atoi(getoptarg(&argc, &argv));
- break;
- }
- argc--;
- argv++;
- }
- proceed:
- for (i = 0; i < no; i++)
- if (olist[i]-- > NFLD) /* 0 origin */
- error("field number too big in -o","");
- if (argc != 3)
- error("usage: join [-1 x -2 y] [-o list] file1 file2","");
- j1--;
- j2--; /* everyone else believes in 0 origin */
- s1 = ppi[F1][j1];
- s2 = ppi[F2][j2];
- if (strcmp(argv[1], "-") == 0)
- f[F1] = stdin;
- else if ((f[F1] = fopen(argv[1], "r")) == 0)
- error("can't open %s", argv[1]);
- if(strcmp(argv[2], "-") == 0) {
- f[F2] = stdin;
- } else if ((f[F2] = fopen(argv[2], "r")) == 0)
- error("can't open %s", argv[2]);
- if(ftell(f[F2]) >= 0)
- seek2();
- else if(ftell(f[F1]) >= 0)
- seek1();
- else
- error("neither file is randomly accessible","");
- if (discard)
- error("some input line was truncated", "");
- exits("");
- }
- int runecmp(Rune *a, Rune *b){
- while(*a==*b){
- if(*a=='\0') return 0;
- a++;
- b++;
- }
- if(*a<*b) return -1;
- return 1;
- }
- char *runetostr(char *buf, Rune *r){
- char *s;
- for(s=buf;*r;r++) s+=runetochar(s, r);
- *s='\0';
- return buf;
- }
- Rune *strtorune(Rune *buf, char *s){
- Rune *r;
- for(r=buf;*s;r++) s+=chartorune(r, s);
- *r='\0';
- return buf;
- }
- /* lazy. there ought to be a clean way to combine seek1 & seek2 */
- #define get1() n1=input(F1)
- #define get2() n2=input(F2)
- void
- seek2()
- {
- int n1, n2;
- int top2=0;
- int bot2 = ftell(f[F2]);
- get1();
- get2();
- while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
- if(n1>0 && n2>0 && comp()>0 || n1==0) {
- if(a2) output(0, n2);
- bot2 = ftell(f[F2]);
- get2();
- } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
- if(a1) output(n1, 0);
- get1();
- } else /*(n1>0 && n2>0 && comp()==0)*/ {
- while(n2>0 && comp()==0) {
- output(n1, n2);
- top2 = ftell(f[F2]);
- get2();
- }
- fseek(f[F2], bot2, 0);
- get2();
- get1();
- for(;;) {
- if(n1>0 && n2>0 && comp()==0) {
- output(n1, n2);
- get2();
- } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
- fseek(f[F2], bot2, 0);
- get2();
- get1();
- } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
- fseek(f[F2], top2, 0);
- bot2 = top2;
- get2();
- break;
- }
- }
- }
- }
- }
- void
- seek1()
- {
- int n1, n2;
- int top1=0;
- int bot1 = ftell(f[F1]);
- get1();
- get2();
- while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
- if(n1>0 && n2>0 && comp()>0 || n1==0) {
- if(a2) output(0, n2);
- get2();
- } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
- if(a1) output(n1, 0);
- bot1 = ftell(f[F1]);
- get1();
- } else /*(n1>0 && n2>0 && comp()==0)*/ {
- while(n2>0 && comp()==0) {
- output(n1, n2);
- top1 = ftell(f[F1]);
- get1();
- }
- fseek(f[F1], bot1, 0);
- get2();
- get1();
- for(;;) {
- if(n1>0 && n2>0 && comp()==0) {
- output(n1, n2);
- get1();
- } else if(n1>0 && n2>0 && comp()>0 || n1==0) {
- fseek(f[F1], bot1, 0);
- get2();
- get1();
- } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
- fseek(f[F1], top1, 0);
- bot1 = top1;
- get1();
- break;
- }
- }
- }
- }
- }
- int
- input(int n) /* get input line and split into fields */
- {
- register int i, c;
- Rune *bp;
- Rune **pp;
- char line[BUFSIZ];
- bp = buf[n];
- pp = ppi[n];
- if (fgets(line, BUFSIZ, f[n]) == 0)
- return(0);
- strtorune(bp, line);
- i = 0;
- do {
- i++;
- if (sep1 == ' ') /* strip multiples */
- while ((c = *bp) == sep1 || c == sep2)
- bp++; /* skip blanks */
- *pp++ = bp; /* record beginning */
- while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
- bp++;
- *bp++ = '\0'; /* mark end by overwriting blank */
- } while (c != '\n' && c != '\0' && i < NFLD-1);
- if (c != '\n')
- discard++;
- *pp = 0;
- return(i);
- }
- void
- output(int on1, int on2) /* print items from olist */
- {
- int i;
- Rune *temp;
- char buf[BUFSIZ];
- if (no <= 0) { /* default case */
- printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
- for (i = 0; i < on1; i++)
- if (i != j1)
- printf("%s%s", sepstr, runetostr(buf, ppi[F1][i]));
- for (i = 0; i < on2; i++)
- if (i != j2)
- printf("%s%s", sepstr, runetostr(buf, ppi[F2][i]));
- printf("\n");
- } else {
- for (i = 0; i < no; i++) {
- if (olistf[i]==F0 && on1>j1)
- temp = ppi[F1][j1];
- else if (olistf[i]==F0 && on2>j2)
- temp = ppi[F2][j2];
- else {
- temp = ppi[olistf[i]][olist[i]];
- if(olistf[i]==F1 && on1<=olist[i] ||
- olistf[i]==F2 && on2<=olist[i] ||
- *temp==0)
- temp = null;
- }
- printf("%s", runetostr(buf, temp));
- if (i == no - 1)
- printf("\n");
- else
- printf("%s", sepstr);
- }
- }
- }
- void
- error(char *s1, char *s2)
- {
- fprintf(stderr, "join: ");
- fprintf(stderr, s1, s2);
- fprintf(stderr, "\n");
- exits(s1);
- }
- char *
- getoptarg(int *argcp, char ***argvp)
- {
- int argc = *argcp;
- char **argv = *argvp;
- if(argv[1][2] != 0)
- return &argv[1][2];
- if(argc<=2 || argv[2][0]=='-')
- error("incomplete option %s", argv[1]);
- *argcp = argc-1;
- *argvp = ++argv;
- return argv[1];
- }
- void
- oparse(char *s)
- {
- for (no = 0; no<2*NFLD && *s; no++, s++) {
- switch(*s) {
- case 0:
- return;
- case '0':
- olistf[no] = F0;
- break;
- case '1':
- case '2':
- if(s[1] == '.' && isdigit(s[2])) {
- olistf[no] = *s=='1'? F1: F2;
- olist[no] = atoi(s += 2);
- break;
- } /* fall thru */
- default:
- error("invalid -o list", "");
- }
- if(s[1] == ',')
- s++;
- }
- }
|