|
@@ -1,45 +1,57 @@
|
|
|
/* join F1 F2 on stuff */
|
|
|
#include <u.h>
|
|
|
#include <libc.h>
|
|
|
-#include <stdio.h>
|
|
|
+#include <bio.h>
|
|
|
#include <ctype.h>
|
|
|
|
|
|
-#define F1 0
|
|
|
-#define F2 1
|
|
|
-#define F0 3
|
|
|
+enum {
|
|
|
+ F1,
|
|
|
+ F2,
|
|
|
+ NIN,
|
|
|
+ F0,
|
|
|
+};
|
|
|
+
|
|
|
#define NFLD 100 /* max field per line */
|
|
|
-#define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
|
|
|
+#define comp() runestrcmp(ppi[F1][j1], ppi[F2][j2])
|
|
|
|
|
|
-FILE *f[2];
|
|
|
-Rune buf[2][BUFSIZ]; /*input lines */
|
|
|
-Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */
|
|
|
-Rune *s1,*s2;
|
|
|
-int j1 = 1; /* join of this field of file 1 */
|
|
|
-int j2 = 1; /* join of this field of file 2 */
|
|
|
-int olist[2*NFLD]; /* output these fields */
|
|
|
-int olistf[2*NFLD]; /* from these files */
|
|
|
-int no; /* number of entries in olist */
|
|
|
+Biobuf *f[NIN];
|
|
|
+Rune buf[NIN][Bsize]; /* input lines */
|
|
|
+Rune *ppi[NIN][NFLD+1]; /* pointers to fields in lines */
|
|
|
Rune sep1 = ' '; /* default field separator */
|
|
|
Rune sep2 = '\t';
|
|
|
-char *sepstr=" ";
|
|
|
-int discard; /* count of truncated lines */
|
|
|
-Rune null[BUFSIZ] = L"";
|
|
|
+int j1 = 1; /* join of this field of file 1 */
|
|
|
+int j2 = 1; /* join of this field of file 2 */
|
|
|
int a1;
|
|
|
int a2;
|
|
|
|
|
|
-char *getoptarg(int*, char***);
|
|
|
-void output(int, int);
|
|
|
-int input(int);
|
|
|
-void oparse(char*);
|
|
|
-void error(char*, char*);
|
|
|
-void seek1(void), seek2(void);
|
|
|
-Rune *strtorune(Rune *, char *);
|
|
|
+int olist[NIN*NFLD]; /* output these fields */
|
|
|
+int olistf[NIN*NFLD]; /* from these files */
|
|
|
+int no; /* number of entries in olist */
|
|
|
+char *sepstr = " ";
|
|
|
+int discard; /* count of truncated lines */
|
|
|
+Rune null[Bsize] = L"";
|
|
|
+Biobuf binbuf, boutbuf;
|
|
|
+Biobuf *bin, *bout;
|
|
|
+
|
|
|
+char *getoptarg(int*, char***);
|
|
|
+int input(int);
|
|
|
+void join(int);
|
|
|
+void oparse(char*);
|
|
|
+void output(int, int);
|
|
|
+Rune *strtorune(Rune *, char *);
|
|
|
|
|
|
void
|
|
|
main(int argc, char **argv)
|
|
|
{
|
|
|
int i;
|
|
|
+ vlong off1, off2;
|
|
|
+
|
|
|
+ bin = &binbuf;
|
|
|
+ bout = &boutbuf;
|
|
|
+ Binit(bin, 0, OREAD);
|
|
|
+ Binit(bout, 1, OWRITE);
|
|
|
|
|
|
+ argv0 = argv[0];
|
|
|
while (argc > 1 && argv[1][0] == '-') {
|
|
|
if (argv[1][1] == '\0')
|
|
|
break;
|
|
@@ -57,7 +69,7 @@ main(int argc, char **argv)
|
|
|
a2++;
|
|
|
break;
|
|
|
default:
|
|
|
- error("incomplete option -a","");
|
|
|
+ sysfatal("incomplete option -a");
|
|
|
}
|
|
|
break;
|
|
|
case 'e':
|
|
@@ -112,49 +124,41 @@ main(int argc, char **argv)
|
|
|
proceed:
|
|
|
for (i = 0; i < no; i++)
|
|
|
if (olist[i]-- > NFLD) /* 0 origin */
|
|
|
- error("field number too big in -o","");
|
|
|
- if (argc != 3)
|
|
|
- error("usage: join [-1 x -2 y] [-o list] file1 file2","");
|
|
|
+ sysfatal("field number too big in -o");
|
|
|
+ if (argc != 3) {
|
|
|
+ fprint(2, "usage: join [-1 x -2 y] [-o list] file1 file2\n");
|
|
|
+ exits("usage");
|
|
|
+ }
|
|
|
if (j1 < 1 || j2 < 1)
|
|
|
- error("invalid field indices", "");
|
|
|
+ sysfatal("invalid field indices");
|
|
|
j1--;
|
|
|
j2--; /* everyone else believes in 0 origin */
|
|
|
- s1 = ppi[F1][j1];
|
|
|
- s2 = ppi[F2][j2];
|
|
|
+
|
|
|
if (strcmp(argv[1], "-") == 0)
|
|
|
- f[F1] = stdin;
|
|
|
- else if ((f[F1] = fopen(argv[1], "r")) == 0)
|
|
|
- error("can't open %s", argv[1]);
|
|
|
- if(strcmp(argv[2], "-") == 0) {
|
|
|
- f[F2] = stdin;
|
|
|
- } else if ((f[F2] = fopen(argv[2], "r")) == 0)
|
|
|
- error("can't open %s", argv[2]);
|
|
|
+ f[F1] = bin;
|
|
|
+ else if ((f[F1] = Bopen(argv[1], OREAD)) == 0)
|
|
|
+ sysfatal("can't open %s: %r", argv[1]);
|
|
|
+ if(strcmp(argv[2], "-") == 0)
|
|
|
+ f[F2] = bin;
|
|
|
+ else if ((f[F2] = Bopen(argv[2], OREAD)) == 0)
|
|
|
+ sysfatal("can't open %s: %r", argv[2]);
|
|
|
|
|
|
- if(ftell(f[F2]) >= 0)
|
|
|
- seek2();
|
|
|
- else if(ftell(f[F1]) >= 0)
|
|
|
- seek1();
|
|
|
- else
|
|
|
- error("neither file is randomly accessible","");
|
|
|
+ off1 = Boffset(f[F1]);
|
|
|
+ off2 = Boffset(f[F2]);
|
|
|
+ if(Bseek(f[F2], 0, 2) >= 0){
|
|
|
+ Bseek(f[F2], off2, 0);
|
|
|
+ join(F2);
|
|
|
+ }else if(Bseek(f[F1], 0, 2) >= 0){
|
|
|
+ Bseek(f[F1], off1, 0);
|
|
|
+ Bseek(f[F2], off2, 0);
|
|
|
+ join(F1);
|
|
|
+ }else
|
|
|
+ sysfatal("neither file is randomly accessible");
|
|
|
if (discard)
|
|
|
- error("some input line was truncated", "");
|
|
|
+ sysfatal("some input line was truncated");
|
|
|
exits("");
|
|
|
}
|
|
|
|
|
|
-int
|
|
|
-runecmp(Rune *a, Rune *b)
|
|
|
-{
|
|
|
- while(*a == *b) {
|
|
|
- if(*a == '\0')
|
|
|
- return 0;
|
|
|
- a++;
|
|
|
- b++;
|
|
|
- }
|
|
|
- if(*a < *b)
|
|
|
- return -1;
|
|
|
- return 1;
|
|
|
-}
|
|
|
-
|
|
|
char *
|
|
|
runetostr(char *buf, Rune *r)
|
|
|
{
|
|
@@ -177,90 +181,71 @@ strtorune(Rune *buf, char *s)
|
|
|
return buf;
|
|
|
}
|
|
|
|
|
|
-/* lazy. there ought to be a clean way to combine seek1 & seek2 */
|
|
|
-#define get1() n1=input(F1)
|
|
|
-#define get2() n2=input(F2)
|
|
|
+void
|
|
|
+readboth(int n[])
|
|
|
+{
|
|
|
+ n[F1] = input(F1);
|
|
|
+ n[F2] = input(F2);
|
|
|
+}
|
|
|
|
|
|
void
|
|
|
-seek2()
|
|
|
+seekbotreadboth(int seekf, vlong bot, int n[])
|
|
|
{
|
|
|
- int n1, n2;
|
|
|
- int top2=0;
|
|
|
- int bot2 = ftell(f[F2]);
|
|
|
- get1();
|
|
|
- get2();
|
|
|
- while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
|
|
|
- if(n1>0 && n2>0 && comp()>0 || n1==0) {
|
|
|
- if(a2) output(0, n2);
|
|
|
- bot2 = ftell(f[F2]);
|
|
|
- get2();
|
|
|
- } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
|
|
|
- if(a1) output(n1, 0);
|
|
|
- get1();
|
|
|
- } else /*(n1>0 && n2>0 && comp()==0)*/ {
|
|
|
- while(n2>0 && comp()==0) {
|
|
|
- output(n1, n2);
|
|
|
- top2 = ftell(f[F2]);
|
|
|
- get2();
|
|
|
- }
|
|
|
- fseek(f[F2], bot2, 0);
|
|
|
- get2();
|
|
|
- get1();
|
|
|
- for(;;) {
|
|
|
- if(n1>0 && n2>0 && comp()==0) {
|
|
|
- output(n1, n2);
|
|
|
- get2();
|
|
|
- } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
|
|
|
- fseek(f[F2], bot2, 0);
|
|
|
- get2();
|
|
|
- get1();
|
|
|
- } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
|
|
|
- fseek(f[F2], top2, 0);
|
|
|
- bot2 = top2;
|
|
|
- get2();
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ Bseek(f[seekf], bot, 0);
|
|
|
+ readboth(n);
|
|
|
}
|
|
|
+
|
|
|
void
|
|
|
-seek1()
|
|
|
+join(int seekf)
|
|
|
{
|
|
|
- int n1, n2;
|
|
|
- int top1=0;
|
|
|
- int bot1 = ftell(f[F1]);
|
|
|
- get1();
|
|
|
- get2();
|
|
|
- while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
|
|
|
- if(n1>0 && n2>0 && comp()>0 || n1==0) {
|
|
|
- if(a2) output(0, n2);
|
|
|
- get2();
|
|
|
- } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
|
|
|
- if(a1) output(n1, 0);
|
|
|
- bot1 = ftell(f[F1]);
|
|
|
- get1();
|
|
|
- } else /*(n1>0 && n2>0 && comp()==0)*/ {
|
|
|
- while(n2>0 && comp()==0) {
|
|
|
- output(n1, n2);
|
|
|
- top1 = ftell(f[F1]);
|
|
|
- get1();
|
|
|
+ int cmp, less;
|
|
|
+ int n[NIN];
|
|
|
+ vlong top, bot;
|
|
|
+
|
|
|
+ less = seekf == F2;
|
|
|
+ top = 0;
|
|
|
+ bot = Boffset(f[seekf]);
|
|
|
+ readboth(n);
|
|
|
+ while(n[F1]>0 && n[F2]>0 || (a1||a2) && n[F1]+n[F2]>0) {
|
|
|
+ cmp = comp();
|
|
|
+ if(n[F1]>0 && n[F2]>0 && cmp>0 || n[F1]==0) {
|
|
|
+ if(a2)
|
|
|
+ output(0, n[F2]);
|
|
|
+ if (seekf == F2)
|
|
|
+ bot = Boffset(f[seekf]);
|
|
|
+ n[F2] = input(F2);
|
|
|
+ } else if(n[F1]>0 && n[F2]>0 && cmp<0 || n[F2]==0) {
|
|
|
+ if(a1)
|
|
|
+ output(n[F1], 0);
|
|
|
+ if (seekf == F1)
|
|
|
+ bot = Boffset(f[seekf]);
|
|
|
+ n[F1] = input(F1);
|
|
|
+ } else {
|
|
|
+ /* n[F1]>0 && n[F2]>0 && cmp==0 */
|
|
|
+ while(n[F2]>0 && cmp==0) {
|
|
|
+ output(n[F1], n[F2]);
|
|
|
+ top = Boffset(f[seekf]);
|
|
|
+ n[seekf] = input(seekf);
|
|
|
+ cmp = comp();
|
|
|
}
|
|
|
- fseek(f[F1], bot1, 0);
|
|
|
- get2();
|
|
|
- get1();
|
|
|
+ seekbotreadboth(seekf, bot, n);
|
|
|
for(;;) {
|
|
|
- if(n1>0 && n2>0 && comp()==0) {
|
|
|
- output(n1, n2);
|
|
|
- get1();
|
|
|
- } else if(n1>0 && n2>0 && comp()>0 || n1==0) {
|
|
|
- fseek(f[F1], bot1, 0);
|
|
|
- get2();
|
|
|
- get1();
|
|
|
- } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
|
|
|
- fseek(f[F1], top1, 0);
|
|
|
- bot1 = top1;
|
|
|
- get1();
|
|
|
+ cmp = comp();
|
|
|
+ if(n[F1]>0 && n[F2]>0 && cmp==0) {
|
|
|
+ output(n[F1], n[F2]);
|
|
|
+ n[seekf] = input(seekf);
|
|
|
+ } else if(n[F1]>0 && n[F2]>0 &&
|
|
|
+ (less? cmp<0 :cmp>0) || n[seekf]==0)
|
|
|
+ seekbotreadboth(seekf, bot, n);
|
|
|
+ else {
|
|
|
+ /*
|
|
|
+ * n[F1]>0 && n[F2]>0 &&
|
|
|
+ * (less? cmp>0 :cmp<0) ||
|
|
|
+ * n[seekf==F1? F2: F1]==0
|
|
|
+ */
|
|
|
+ Bseek(f[seekf], top, 0);
|
|
|
+ bot = top;
|
|
|
+ n[seekf] = input(seekf);
|
|
|
break;
|
|
|
}
|
|
|
}
|
|
@@ -271,50 +256,63 @@ seek1()
|
|
|
int
|
|
|
input(int n) /* get input line and split into fields */
|
|
|
{
|
|
|
- int i, c;
|
|
|
+ int c, i, len;
|
|
|
+ char *line;
|
|
|
Rune *bp;
|
|
|
Rune **pp;
|
|
|
- char line[BUFSIZ];
|
|
|
|
|
|
bp = buf[n];
|
|
|
pp = ppi[n];
|
|
|
- if (fgets(line, BUFSIZ, f[n]) == 0)
|
|
|
+ line = Brdline(f[n], '\n');
|
|
|
+ if (line == nil)
|
|
|
return(0);
|
|
|
+ len = Blinelen(f[n]) - 1;
|
|
|
+ c = line[len];
|
|
|
+ line[len] = '\0';
|
|
|
strtorune(bp, line);
|
|
|
+ line[len] = c; /* restore delimiter */
|
|
|
+ if (c != '\n')
|
|
|
+ discard++;
|
|
|
+
|
|
|
i = 0;
|
|
|
do {
|
|
|
i++;
|
|
|
if (sep1 == ' ') /* strip multiples */
|
|
|
while ((c = *bp) == sep1 || c == sep2)
|
|
|
bp++; /* skip blanks */
|
|
|
- *pp++ = bp; /* record beginning */
|
|
|
- while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
|
|
|
+ *pp++ = bp; /* record beginning */
|
|
|
+ while ((c = *bp) != sep1 && c != sep2 && c != '\0')
|
|
|
bp++;
|
|
|
- *bp++ = '\0'; /* mark end by overwriting blank */
|
|
|
- } while (c != '\n' && c != '\0' && i < NFLD-1);
|
|
|
- if (c != '\n')
|
|
|
- discard++;
|
|
|
+ *bp++ = '\0'; /* mark end by overwriting blank */
|
|
|
+ } while (c != '\0' && i < NFLD-1);
|
|
|
|
|
|
*pp = 0;
|
|
|
return(i);
|
|
|
}
|
|
|
|
|
|
+void
|
|
|
+prfields(int f, int on, int jn)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ char buf[Bsize];
|
|
|
+
|
|
|
+ for (i = 0; i < on; i++)
|
|
|
+ if (i != jn)
|
|
|
+ Bprint(bout, "%s%s", sepstr, runetostr(buf, ppi[f][i]));
|
|
|
+}
|
|
|
+
|
|
|
void
|
|
|
output(int on1, int on2) /* print items from olist */
|
|
|
{
|
|
|
int i;
|
|
|
Rune *temp;
|
|
|
- char buf[BUFSIZ];
|
|
|
+ char buf[Bsize];
|
|
|
|
|
|
if (no <= 0) { /* default case */
|
|
|
- printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
|
|
|
- for (i = 0; i < on1; i++)
|
|
|
- if (i != j1)
|
|
|
- printf("%s%s", sepstr, runetostr(buf, ppi[F1][i]));
|
|
|
- for (i = 0; i < on2; i++)
|
|
|
- if (i != j2)
|
|
|
- printf("%s%s", sepstr, runetostr(buf, ppi[F2][i]));
|
|
|
- printf("\n");
|
|
|
+ Bprint(bout, "%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
|
|
|
+ prfields(F1, on1, j1);
|
|
|
+ prfields(F2, on2, j2);
|
|
|
+ Bputc(bout, '\n');
|
|
|
} else {
|
|
|
for (i = 0; i < no; i++) {
|
|
|
if (olistf[i]==F0 && on1>j1)
|
|
@@ -328,24 +326,15 @@ output(int on1, int on2) /* print items from olist */
|
|
|
*temp==0)
|
|
|
temp = null;
|
|
|
}
|
|
|
- printf("%s", runetostr(buf, temp));
|
|
|
+ Bprint(bout, "%s", runetostr(buf, temp));
|
|
|
if (i == no - 1)
|
|
|
- printf("\n");
|
|
|
+ Bputc(bout, '\n');
|
|
|
else
|
|
|
- printf("%s", sepstr);
|
|
|
+ Bprint(bout, "%s", sepstr);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-void
|
|
|
-error(char *s1, char *s2)
|
|
|
-{
|
|
|
- fprintf(stderr, "join: ");
|
|
|
- fprintf(stderr, s1, s2);
|
|
|
- fprintf(stderr, "\n");
|
|
|
- exits(s1);
|
|
|
-}
|
|
|
-
|
|
|
char *
|
|
|
getoptarg(int *argcp, char ***argvp)
|
|
|
{
|
|
@@ -354,7 +343,7 @@ getoptarg(int *argcp, char ***argvp)
|
|
|
if(argv[1][2] != 0)
|
|
|
return &argv[1][2];
|
|
|
if(argc<=2 || argv[2][0]=='-')
|
|
|
- error("incomplete option %s", argv[1]);
|
|
|
+ sysfatal("incomplete option %s", argv[1]);
|
|
|
*argcp = argc-1;
|
|
|
*argvp = ++argv;
|
|
|
return argv[1];
|
|
@@ -379,7 +368,7 @@ oparse(char *s)
|
|
|
}
|
|
|
/* fall thru */
|
|
|
default:
|
|
|
- error("invalid -o list", "");
|
|
|
+ sysfatal("invalid -o list");
|
|
|
}
|
|
|
if(s[1] == ',')
|
|
|
s++;
|