list.c 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. #include <u.h>
  2. #include <libc.h>
  3. #include <regexp.h>
  4. #include <libsec.h>
  5. #include <String.h>
  6. #include <bio.h>
  7. #include "dat.h"
  8. int debug;
  9. enum
  10. {
  11. Tregexp= (1<<0), /* ~ */
  12. Texact= (1<<1), /* = */
  13. };
  14. typedef struct Pattern Pattern;
  15. struct Pattern
  16. {
  17. Pattern *next;
  18. int type;
  19. char *arg;
  20. int bang;
  21. };
  22. String *patternpath;
  23. Pattern *patterns;
  24. String *mbox;
  25. static void
  26. usage(void)
  27. {
  28. fprint(2, "usage: %s add|check patternfile [addressfile ...]\n", argv0);
  29. exits("usage");
  30. }
  31. /*
  32. * convert string to lower case
  33. */
  34. static void
  35. mklower(char *p)
  36. {
  37. int c;
  38. for(; *p; p++){
  39. c = *p;
  40. if(c <= 'Z' && c >= 'A')
  41. *p = c - 'A' + 'a';
  42. }
  43. }
  44. /*
  45. * simplify an address, reduce to a domain
  46. */
  47. static String*
  48. simplify(char *addr)
  49. {
  50. int dots;
  51. char *p, *at;
  52. String *s;
  53. mklower(addr);
  54. at = strchr(addr, '@');
  55. if(at == nil){
  56. /* local address, make it an exact match */
  57. s = s_copy("=");
  58. s_append(s, addr);
  59. return s;
  60. }
  61. /* copy up to the '@' sign */
  62. at++;
  63. s = s_copy("~");
  64. for(p = addr; p < at; p++){
  65. if(strchr(".*+?(|)\\[]^$", *p))
  66. s_putc(s, '\\');
  67. s_putc(s, *p);
  68. }
  69. /* just any address matching the two most significant domain elements */
  70. s_append(s, "(.*\\.)?");
  71. p = addr+strlen(addr);
  72. dots = 0;
  73. for(; p > at; p--){
  74. if(*p != '.')
  75. continue;
  76. if(dots++ > 0){
  77. p++;
  78. break;
  79. }
  80. }
  81. for(; *p; p++){
  82. if(strchr(".*+?(|)\\[]^$", *p) != 0)
  83. s_putc(s, '\\');
  84. s_putc(s, *p);
  85. }
  86. s_terminate(s);
  87. return s;
  88. }
  89. /*
  90. * link patterns in order
  91. */
  92. static int
  93. newpattern(int type, char *arg, int bang)
  94. {
  95. Pattern *p;
  96. static Pattern *last;
  97. mklower(arg);
  98. p = mallocz(sizeof *p, 1);
  99. if(p == nil)
  100. return -1;
  101. if(type == Tregexp){
  102. p->arg = malloc(strlen(arg)+3);
  103. if(p->arg == nil){
  104. free(p);
  105. return -1;
  106. }
  107. p->arg[0] = 0;
  108. strcat(p->arg, "^");
  109. strcat(p->arg, arg);
  110. strcat(p->arg, "$");
  111. } else {
  112. p->arg = strdup(arg);
  113. if(p->arg == nil){
  114. free(p);
  115. return -1;
  116. }
  117. }
  118. p->type = type;
  119. p->bang = bang;
  120. if(last == nil)
  121. patterns = p;
  122. else
  123. last->next = p;
  124. last = p;
  125. return 0;
  126. }
  127. /*
  128. * patterns are either
  129. * ~ regular expression
  130. * = exact match string
  131. *
  132. * all comparisons are case insensitive
  133. */
  134. static int
  135. readpatterns(char *path)
  136. {
  137. Biobuf *b;
  138. char *p;
  139. char *token[2];
  140. int n;
  141. int bang;
  142. b = Bopen(path, OREAD);
  143. if(b == nil)
  144. return -1;
  145. while((p = Brdline(b, '\n')) != nil){
  146. p[Blinelen(b)-1] = 0;
  147. n = tokenize(p, token, 2);
  148. if(n == 0)
  149. continue;
  150. mklower(token[0]);
  151. p = token[0];
  152. if(*p == '!'){
  153. p++;
  154. bang = 1;
  155. } else
  156. bang = 0;
  157. if(*p == '='){
  158. if(newpattern(Texact, p+1, bang) < 0)
  159. return -1;
  160. } else if(*p == '~'){
  161. if(newpattern(Tregexp, p+1, bang) < 0)
  162. return -1;
  163. } else if(strcmp(token[0], "#include") == 0 && n == 2)
  164. readpatterns(token[1]);
  165. }
  166. Bterm(b);
  167. return 0;
  168. }
  169. /* fuck, shit, bugger, damn */
  170. void regerror(char*)
  171. {
  172. }
  173. /*
  174. * check lower case version of address agains patterns
  175. */
  176. static Pattern*
  177. checkaddr(char *arg)
  178. {
  179. Pattern *p;
  180. Reprog *rp;
  181. String *s;
  182. s = s_copy(arg);
  183. mklower(s_to_c(s));
  184. for(p = patterns; p != nil; p = p->next)
  185. switch(p->type){
  186. case Texact:
  187. if(strcmp(p->arg, s_to_c(s)) == 0){
  188. free(s);
  189. return p;
  190. }
  191. break;
  192. case Tregexp:
  193. rp = regcomp(p->arg);
  194. if(rp == nil)
  195. continue;
  196. if(regexec(rp, s_to_c(s), nil, 0)){
  197. free(rp);
  198. free(s);
  199. return p;
  200. }
  201. free(rp);
  202. break;
  203. }
  204. s_free(s);
  205. return 0;
  206. }
  207. static char*
  208. check(int argc, char **argv)
  209. {
  210. int i;
  211. Addr *a;
  212. Pattern *p;
  213. int matchedbang;
  214. matchedbang = 0;
  215. for(i = 0; i < argc; i++){
  216. a = readaddrs(argv[i], nil);
  217. for(; a != nil; a = a->next){
  218. p = checkaddr(a->val);
  219. if(p == nil)
  220. continue;
  221. if(p->bang)
  222. matchedbang = 1;
  223. else
  224. return nil;
  225. }
  226. }
  227. if(matchedbang)
  228. return "!match";
  229. else
  230. return "no match";
  231. }
  232. /*
  233. * add anything that isn't already matched, all matches are lower case
  234. */
  235. static char*
  236. add(char *pp, int argc, char **argv)
  237. {
  238. int fd, i;
  239. String *s;
  240. char *cp;
  241. Addr *a;
  242. a = nil;
  243. for(i = 0; i < argc; i++)
  244. a = readaddrs(argv[i], a);
  245. fd = open(pp, OWRITE);
  246. seek(fd, 0, 2);
  247. for(; a != nil; a = a->next){
  248. if(checkaddr(a->val))
  249. continue;
  250. s = simplify(a->val);
  251. cp = s_to_c(s);
  252. fprint(fd, "%q\t%q\n", cp, a->val);
  253. if(*cp == '=')
  254. newpattern(Texact, cp+1, 0);
  255. else if(*cp == '~')
  256. newpattern(Tregexp, cp+1, 0);
  257. s_free(s);
  258. }
  259. close(fd);
  260. return nil;
  261. }
  262. void
  263. main(int argc, char **argv)
  264. {
  265. char *patternpath;
  266. ARGBEGIN {
  267. case 'd':
  268. debug++;
  269. break;
  270. } ARGEND;
  271. quotefmtinstall();
  272. if(argc < 3)
  273. usage();
  274. patternpath = argv[1];
  275. readpatterns(patternpath);
  276. if(strcmp(argv[0], "add") == 0)
  277. exits(add(patternpath, argc-2, argv+2));
  278. else if(strcmp(argv[0], "check") == 0)
  279. exits(check(argc-2, argv+2));
  280. else
  281. usage();
  282. }