split.c 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <bio.h>
  4. #include <regexp.h>
  5. char digit[] = "0123456789";
  6. char *suffix = "";
  7. char *stem = "x";
  8. char suff[] = "aa";
  9. char name[200];
  10. Biobuf bout;
  11. Biobuf *output = &bout;
  12. extern int nextfile(void);
  13. extern int matchfile(Resub*);
  14. extern void openf(void);
  15. extern char *fold(char*,int);
  16. extern void usage(void);
  17. extern void badexp(void);
  18. void
  19. main(int argc, char *argv[])
  20. {
  21. Reprog *exp;
  22. char *pattern = 0;
  23. int n = 1000;
  24. char *line;
  25. int xflag = 0;
  26. int iflag = 0;
  27. Biobuf bin;
  28. Biobuf *b = &bin;
  29. char buf[256];
  30. ARGBEGIN {
  31. case 'l':
  32. case 'n':
  33. n=atoi(EARGF(usage()));
  34. break;
  35. case 'e':
  36. pattern = strdup(EARGF(usage()));
  37. break;
  38. case 'f':
  39. stem = strdup(EARGF(usage()));
  40. break;
  41. case 's':
  42. suffix = strdup(EARGF(usage()));
  43. break;
  44. case 'x':
  45. xflag++;
  46. break;
  47. case 'i':
  48. iflag++;
  49. break;
  50. default:
  51. usage();
  52. break;
  53. } ARGEND;
  54. if(argc < 0 || argc > 1)
  55. usage();
  56. if(argc != 0) {
  57. b = Bopen(argv[0], OREAD);
  58. if(b == nil) {
  59. fprint(2, "split: can't open %s: %r\n", argv[0]);
  60. exits("open");
  61. }
  62. } else
  63. Binit(b, 0, OREAD);
  64. if(pattern) {
  65. if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern)))
  66. badexp();
  67. while((line=Brdline(b,'\n')) != 0) {
  68. Resub match[2];
  69. memset(match, 0, sizeof match);
  70. line[Blinelen(b)-1] = 0;
  71. if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) {
  72. if(matchfile(match) && xflag)
  73. continue;
  74. } else if(output == 0)
  75. nextfile(); /* at most once */
  76. Bwrite(output, line, Blinelen(b)-1);
  77. Bputc(output, '\n');
  78. }
  79. } else {
  80. int linecnt = n;
  81. while((line=Brdline(b,'\n')) != 0) {
  82. if(++linecnt > n) {
  83. nextfile();
  84. linecnt = 1;
  85. }
  86. Bwrite(output, line, Blinelen(b));
  87. }
  88. /*
  89. * in case we didn't end with a newline, tack whatever's
  90. * left onto the last file
  91. */
  92. while((n = Bread(b, buf, sizeof(buf))) > 0)
  93. Bwrite(output, buf, n);
  94. }
  95. if(b != nil)
  96. Bterm(b);
  97. exits(0);
  98. }
  99. int
  100. nextfile(void)
  101. {
  102. static canopen = 1;
  103. if(suff[0] > 'z') {
  104. if(canopen)
  105. fprint(2, "split: file %szz not split\n",stem);
  106. canopen = 0;
  107. } else {
  108. snprint(name, sizeof name, "%s%s", stem, suff);
  109. if(++suff[1] > 'z')
  110. suff[1] = 'a', ++suff[0];
  111. openf();
  112. }
  113. return canopen;
  114. }
  115. int
  116. matchfile(Resub *match)
  117. {
  118. if(match[1].sp) {
  119. int len = match[1].ep - match[1].sp;
  120. strncpy(name, match[1].sp, len);
  121. strcpy(name+len, suffix);
  122. openf();
  123. return 1;
  124. }
  125. return nextfile();
  126. }
  127. void
  128. openf(void)
  129. {
  130. static int fd = 0;
  131. Bflush(output);
  132. Bterm(output);
  133. if(fd > 0)
  134. close(fd);
  135. fd = create(name,OWRITE,0666);
  136. if(fd < 0) {
  137. fprint(2, "grep: can't create %s: %r\n", name);
  138. exits("create");
  139. }
  140. Binit(output, fd, OWRITE);
  141. }
  142. char *
  143. fold(char *s, int n)
  144. {
  145. static char *fline;
  146. static int linesize = 0;
  147. char *t;
  148. if(linesize < n+1){
  149. fline = realloc(fline,n+1);
  150. linesize = n+1;
  151. }
  152. for(t=fline; *t++ = tolower(*s++); )
  153. continue;
  154. /* we assume the 'A'-'Z' only appear as themselves
  155. * in a utf encoding.
  156. */
  157. return fline;
  158. }
  159. void
  160. usage(void)
  161. {
  162. fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
  163. exits("usage");
  164. }
  165. void
  166. badexp(void)
  167. {
  168. fprint(2, "split: bad regular expression\n");
  169. exits("bad regular expression");
  170. }