split.c 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include <regexp.h>
  5. char digit[] = "0123456789";
  6. char *suffix = "";
  7. char *stem = "x";
  8. char suff[] = "aa";
  9. char name[200];
  10. Biobuf bout;
  11. Biobuf *output = &bout;
  12. extern int nextfile(void);
  13. extern int matchfile(Resub*);
  14. extern void openf(void);
  15. extern char *fold(char*,int);
  16. extern void usage(void);
  17. extern void badexp(void);
  18. void
  19. main(int argc, char *argv[])
  20. {
  21. Reprog *exp;
  22. char *pattern = 0;
  23. int n = 1000;
  24. char *line;
  25. int xflag = 0;
  26. int iflag = 0;
  27. Biobuf bin;
  28. Biobuf *b = &bin;
  29. char buf[256];
  30. ARGBEGIN {
  31. case 'l':
  32. case 'n':
  33. n=atoi(EARGF(usage()));
  34. break;
  35. case 'e':
  36. pattern = strdup(EARGF(usage()));
  37. break;
  38. case 'f':
  39. stem = strdup(EARGF(usage()));
  40. break;
  41. case 's':
  42. suffix = strdup(EARGF(usage()));
  43. break;
  44. case 'x':
  45. xflag++;
  46. break;
  47. case 'i':
  48. iflag++;
  49. break;
  50. default:
  51. usage();
  52. break;
  53. } ARGEND;
  54. if(argc < 0 || argc > 1)
  55. usage();
  56. if(argc != 0) {
  57. b = Bopen(argv[0], OREAD);
  58. if(b == nil) {
  59. fprint(2, "split: can't open %s: %r\n", argv[0]);
  60. exits("open");
  61. }
  62. } else
  63. Binit(b, 0, OREAD);
  64. if(pattern) {
  65. if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern)))
  66. badexp();
  67. while((line=Brdline(b,'\n')) != 0) {
  68. Resub match[2];
  69. memset(match, 0, sizeof match);
  70. line[Blinelen(b)-1] = 0;
  71. if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) {
  72. if(matchfile(match) && xflag)
  73. continue;
  74. } else if(output == 0)
  75. nextfile(); /* at most once */
  76. Bwrite(output, line, Blinelen(b)-1);
  77. Bputc(output, '\n');
  78. }
  79. } else {
  80. int linecnt = n;
  81. while((line=Brdline(b,'\n')) != 0) {
  82. if(++linecnt > n) {
  83. nextfile();
  84. linecnt = 1;
  85. }
  86. Bwrite(output, line, Blinelen(b));
  87. }
  88. /*
  89. * in case we didn't end with a newline, tack whatever's
  90. * left onto the last file
  91. */
  92. while((n = Bread(b, buf, sizeof(buf))) > 0)
  93. Bwrite(output, buf, n);
  94. }
  95. if(b != nil)
  96. Bterm(b);
  97. exits(0);
  98. }
  99. int
  100. nextfile(void)
  101. {
  102. static canopen = 1;
  103. if(suff[0] > 'z') {
  104. if(canopen)
  105. fprint(2, "split: file %szz not split\n",stem);
  106. canopen = 0;
  107. } else {
  108. strcpy(name, stem);
  109. strcat(name, suff);
  110. if(++suff[1] > 'z')
  111. suff[1] = 'a', ++suff[0];
  112. openf();
  113. }
  114. return canopen;
  115. }
  116. int
  117. matchfile(Resub *match)
  118. {
  119. if(match[1].sp) {
  120. int len = match[1].ep - match[1].sp;
  121. strncpy(name, match[1].sp, len);
  122. strcpy(name+len, suffix);
  123. openf();
  124. return 1;
  125. }
  126. return nextfile();
  127. }
  128. void
  129. openf(void)
  130. {
  131. static int fd = 0;
  132. Bflush(output);
  133. Bterm(output);
  134. if(fd > 0)
  135. close(fd);
  136. fd = create(name,OWRITE,0666);
  137. if(fd < 0) {
  138. fprint(2, "grep: can't create %s: %r\n", name);
  139. exits("create");
  140. }
  141. Binit(output, fd, OWRITE);
  142. }
  143. char *
  144. fold(char *s, int n)
  145. {
  146. static char *fline;
  147. static int linesize = 0;
  148. char *t;
  149. if(linesize < n+1){
  150. fline = realloc(fline,n+1);
  151. linesize = n+1;
  152. }
  153. for(t=fline; *t++ = tolower(*s++); )
  154. continue;
  155. /* we assume the 'A'-'Z' only appear as themselves
  156. * in a utf encoding.
  157. */
  158. return fline;
  159. }
  160. void
  161. usage(void)
  162. {
  163. fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
  164. exits("usage");
  165. }
  166. void
  167. badexp(void)
  168. {
  169. fprint(2, "split: bad regular expression\n");
  170. exits("bad regular expression");
  171. }