split.c 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include <regexp.h>
  5. char digit[] = "0123456789";
  6. char *suffix = "";
  7. char *stem = "x";
  8. char suff[] = "aa";
  9. char name[200];
  10. Biobuf bout;
  11. Biobuf *output = &bout;
  12. extern int nextfile(void);
  13. extern int matchfile(Resub*);
  14. extern void openf(void);
  15. extern char *fold(char*,int);
  16. extern void usage(void);
  17. extern void badexp(void);
  18. void
  19. main(int argc, char *argv[])
  20. {
  21. Reprog *exp;
  22. char *pattern = 0;
  23. int n = 1000;
  24. char *line;
  25. int xflag = 0;
  26. int iflag = 0;
  27. Biobuf bin;
  28. Biobuf *b = &bin;
  29. char buf[256];
  30. ARGBEGIN {
  31. case 'l':
  32. case 'n':
  33. n=atoi(EARGF(usage()));
  34. break;
  35. case 'e':
  36. pattern = strdup(EARGF(usage()));
  37. break;
  38. case 'f':
  39. stem = strdup(EARGF(usage()));
  40. break;
  41. case 's':
  42. suffix = strdup(EARGF(usage()));
  43. break;
  44. case 'x':
  45. xflag++;
  46. break;
  47. case 'i':
  48. iflag++;
  49. break;
  50. default:
  51. usage();
  52. break;
  53. } ARGEND;
  54. if(argc < 0 || argc > 1)
  55. usage();
  56. if(argc != 0) {
  57. b = Bopen(argv[0], OREAD);
  58. if(b == nil) {
  59. fprint(2, "split: can't open %s: %r\n", argv[0]);
  60. exits("open");
  61. }
  62. } else
  63. Binit(b, 0, OREAD);
  64. if(pattern) {
  65. Resub match[2];
  66. if(!(exp = regcomp(iflag? fold(pattern, strlen(pattern)):
  67. pattern)))
  68. badexp();
  69. memset(match, 0, sizeof match);
  70. matchfile(match);
  71. while((line=Brdline(b,'\n')) != 0) {
  72. memset(match, 0, sizeof match);
  73. line[Blinelen(b)-1] = 0;
  74. if(regexec(exp, iflag? fold(line, Blinelen(b)-1): line,
  75. match, 2)) {
  76. if(matchfile(match) && xflag)
  77. continue;
  78. } else if(output == 0)
  79. nextfile(); /* at most once */
  80. Bwrite(output, line, Blinelen(b)-1);
  81. Bputc(output, '\n');
  82. }
  83. } else {
  84. int linecnt = n;
  85. while((line=Brdline(b,'\n')) != 0) {
  86. if(++linecnt > n) {
  87. nextfile();
  88. linecnt = 1;
  89. }
  90. Bwrite(output, line, Blinelen(b));
  91. }
  92. /*
  93. * in case we didn't end with a newline, tack whatever's
  94. * left onto the last file
  95. */
  96. while((n = Bread(b, buf, sizeof(buf))) > 0)
  97. Bwrite(output, buf, n);
  98. }
  99. if(b != nil)
  100. Bterm(b);
  101. exits(0);
  102. }
  103. int
  104. nextfile(void)
  105. {
  106. static int canopen = 1;
  107. if(suff[0] > 'z') {
  108. if(canopen)
  109. fprint(2, "split: file %szz not split\n",stem);
  110. canopen = 0;
  111. } else {
  112. snprint(name, sizeof name, "%s%s", stem, suff);
  113. if(++suff[1] > 'z')
  114. suff[1] = 'a', ++suff[0];
  115. openf();
  116. }
  117. return canopen;
  118. }
  119. int
  120. matchfile(Resub *match)
  121. {
  122. if(match[1].sp) {
  123. int len = match[1].ep - match[1].sp;
  124. strncpy(name, match[1].sp, len);
  125. strcpy(name+len, suffix);
  126. openf();
  127. return 1;
  128. }
  129. return nextfile();
  130. }
  131. void
  132. openf(void)
  133. {
  134. static int fd = 0;
  135. Bflush(output);
  136. Bterm(output);
  137. if(fd > 0)
  138. close(fd);
  139. fd = create(name,OWRITE,0666);
  140. if(fd < 0) {
  141. fprint(2, "grep: can't create %s: %r\n", name);
  142. exits("create");
  143. }
  144. Binit(output, fd, OWRITE);
  145. }
  146. char *
  147. fold(char *s, int n)
  148. {
  149. static char *fline;
  150. static int linesize = 0;
  151. char *t;
  152. if(linesize < n+1){
  153. fline = realloc(fline,n+1);
  154. linesize = n+1;
  155. }
  156. for(t=fline; *t++ = tolower(*s++); )
  157. continue;
  158. /* we assume the 'A'-'Z' only appear as themselves
  159. * in a utf encoding.
  160. */
  161. return fline;
  162. }
  163. void
  164. usage(void)
  165. {
  166. fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
  167. exits("usage");
  168. }
  169. void
  170. badexp(void)
  171. {
  172. fprint(2, "split: bad regular expression\n");
  173. exits("bad regular expression");
  174. }