split.c 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. /*
  2. * This file is part of the UCB release of Plan 9. It is subject to the license
  3. * terms in the LICENSE file found in the top-level directory of this
  4. * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
  5. * part of the UCB release of Plan 9, including this file, may be copied,
  6. * modified, propagated, or distributed except according to the terms contained
  7. * in the LICENSE file.
  8. */
  9. #include <u.h>
  10. #include <libc.h>
  11. #include <bio.h>
  12. #include <regexp.h>
  13. char digit[] = "0123456789";
  14. char *suffix = "";
  15. char *stem = "x";
  16. char suff[] = "aa";
  17. char name[200];
  18. Biobuf bout;
  19. Biobuf *output = &bout;
  20. extern int nextfile(void);
  21. extern int matchfile(Resub*);
  22. extern void openf(void);
  23. extern char *fold(char*,int);
  24. extern void usage(void);
  25. extern void badexp(void);
  26. void
  27. main(int argc, char *argv[])
  28. {
  29. Reprog *exp;
  30. char *pattern = 0;
  31. int n = 1000;
  32. char *line;
  33. int xflag = 0;
  34. int iflag = 0;
  35. Biobuf bin;
  36. Biobuf *b = &bin;
  37. char buf[256];
  38. ARGBEGIN {
  39. case 'l':
  40. case 'n':
  41. n=atoi(EARGF(usage()));
  42. break;
  43. case 'e':
  44. pattern = strdup(EARGF(usage()));
  45. break;
  46. case 'f':
  47. stem = strdup(EARGF(usage()));
  48. break;
  49. case 's':
  50. suffix = strdup(EARGF(usage()));
  51. break;
  52. case 'x':
  53. xflag++;
  54. break;
  55. case 'i':
  56. iflag++;
  57. break;
  58. default:
  59. usage();
  60. break;
  61. } ARGEND;
  62. if(argc < 0 || argc > 1)
  63. usage();
  64. if(argc != 0) {
  65. b = Bopen(argv[0], OREAD);
  66. if(b == nil) {
  67. fprint(2, "split: can't open %s: %r\n", argv[0]);
  68. exits("open");
  69. }
  70. } else
  71. Binit(b, 0, OREAD);
  72. if(pattern) {
  73. Resub match[2];
  74. if(!(exp = regcomp(iflag? fold(pattern, strlen(pattern)):
  75. pattern)))
  76. badexp();
  77. memset(match, 0, sizeof match);
  78. matchfile(match);
  79. while((line=Brdline(b,'\n')) != 0) {
  80. memset(match, 0, sizeof match);
  81. line[Blinelen(b)-1] = 0;
  82. if(regexec(exp, iflag? fold(line, Blinelen(b)-1): line,
  83. match, 2)) {
  84. if(matchfile(match) && xflag)
  85. continue;
  86. } else if(output == 0)
  87. nextfile(); /* at most once */
  88. Bwrite(output, line, Blinelen(b)-1);
  89. Bputc(output, '\n');
  90. }
  91. } else {
  92. int linecnt = n;
  93. while((line=Brdline(b,'\n')) != 0) {
  94. if(++linecnt > n) {
  95. nextfile();
  96. linecnt = 1;
  97. }
  98. Bwrite(output, line, Blinelen(b));
  99. }
  100. /*
  101. * in case we didn't end with a newline, tack whatever's
  102. * left onto the last file
  103. */
  104. while((n = Bread(b, buf, sizeof(buf))) > 0)
  105. Bwrite(output, buf, n);
  106. }
  107. if(b != nil)
  108. Bterm(b);
  109. exits(0);
  110. }
  111. int
  112. nextfile(void)
  113. {
  114. static int canopen = 1;
  115. if(suff[0] > 'z') {
  116. if(canopen)
  117. fprint(2, "split: file %szz not split\n",stem);
  118. canopen = 0;
  119. } else {
  120. snprint(name, sizeof name, "%s%s", stem, suff);
  121. if(++suff[1] > 'z')
  122. suff[1] = 'a', ++suff[0];
  123. openf();
  124. }
  125. return canopen;
  126. }
  127. int
  128. matchfile(Resub *match)
  129. {
  130. if(match[1].sp) {
  131. int len = match[1].ep - match[1].sp;
  132. strncpy(name, match[1].sp, len);
  133. strcpy(name+len, suffix);
  134. openf();
  135. return 1;
  136. }
  137. return nextfile();
  138. }
  139. void
  140. openf(void)
  141. {
  142. static int fd = 0;
  143. Bflush(output);
  144. Bterm(output);
  145. if(fd > 0)
  146. close(fd);
  147. fd = create(name,OWRITE,0666);
  148. if(fd < 0) {
  149. fprint(2, "grep: can't create %s: %r\n", name);
  150. exits("create");
  151. }
  152. Binit(output, fd, OWRITE);
  153. }
  154. char *
  155. fold(char *s, int n)
  156. {
  157. static char *fline;
  158. static int linesize = 0;
  159. char *t;
  160. if(linesize < n+1){
  161. fline = realloc(fline,n+1);
  162. linesize = n+1;
  163. }
  164. for(t=fline; (*t++ = tolower(*s++)) != '\0'; )
  165. continue;
  166. /* we assume the 'A'-'Z' only appear as themselves
  167. * in a utf encoding.
  168. */
  169. return fline;
  170. }
  171. void
  172. usage(void)
  173. {
  174. fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
  175. exits("usage");
  176. }
  177. void
  178. badexp(void)
  179. {
  180. fprint(2, "split: bad regular expression\n");
  181. exits("bad regular expression");
  182. }