123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315 |
- #include <u.h>
- #include <libc.h>
- #include <regexp.h>
- #include <libsec.h>
- #include <String.h>
- #include <bio.h>
- #include "dat.h"
- int debug;
- enum
- {
- Tregexp= (1<<0), /* ~ */
- Texact= (1<<1), /* = */
- };
- typedef struct Pattern Pattern;
- struct Pattern
- {
- Pattern *next;
- int type;
- char *arg;
- int bang;
- };
- String *patternpath;
- Pattern *patterns;
- String *mbox;
- static void
- usage(void)
- {
- fprint(2, "usage: %s add|check patternfile [addressfile ...]\n", argv0);
- exits("usage");
- }
- /*
- * convert string to lower case
- */
- static void
- mklower(char *p)
- {
- int c;
- for(; *p; p++){
- c = *p;
- if(c <= 'Z' && c >= 'A')
- *p = c - 'A' + 'a';
- }
- }
- /*
- * simplify an address, reduce to a domain
- */
- static String*
- simplify(char *addr)
- {
- int dots;
- char *p, *at;
- String *s;
- mklower(addr);
- at = strchr(addr, '@');
- if(at == nil){
- /* local address, make it an exact match */
- s = s_copy("=");
- s_append(s, addr);
- return s;
- }
- /* copy up to the '@' sign */
- at++;
- s = s_copy("~");
- for(p = addr; p < at; p++){
- if(strchr(".*+?(|)\\[]^$", *p))
- s_putc(s, '\\');
- s_putc(s, *p);
- }
- /* just any address matching the two most significant domain elements */
- s_append(s, "(.*\\.)?");
- p = addr+strlen(addr);
- dots = 0;
- for(; p > at; p--){
- if(*p != '.')
- continue;
- if(dots++ > 0){
- p++;
- break;
- }
- }
- for(; *p; p++){
- if(strchr(".*+?(|)\\[]^$", *p) != 0)
- s_putc(s, '\\');
- s_putc(s, *p);
- }
- s_terminate(s);
- return s;
- }
- /*
- * link patterns in order
- */
- static int
- newpattern(int type, char *arg, int bang)
- {
- Pattern *p;
- static Pattern *last;
- mklower(arg);
- p = mallocz(sizeof *p, 1);
- if(p == nil)
- return -1;
- if(type == Tregexp){
- p->arg = malloc(strlen(arg)+3);
- if(p->arg == nil){
- free(p);
- return -1;
- }
- p->arg[0] = 0;
- strcat(p->arg, "^");
- strcat(p->arg, arg);
- strcat(p->arg, "$");
- } else {
- p->arg = strdup(arg);
- if(p->arg == nil){
- free(p);
- return -1;
- }
- }
- p->type = type;
- p->bang = bang;
- if(last == nil)
- patterns = p;
- else
- last->next = p;
- last = p;
- return 0;
- }
- /*
- * patterns are either
- * ~ regular expression
- * = exact match string
- *
- * all comparisons are case insensitive
- */
- static int
- readpatterns(char *path)
- {
- Biobuf *b;
- char *p;
- char *token[2];
- int n;
- int bang;
- b = Bopen(path, OREAD);
- if(b == nil)
- return -1;
- while((p = Brdline(b, '\n')) != nil){
- p[Blinelen(b)-1] = 0;
- n = tokenize(p, token, 2);
- if(n == 0)
- continue;
- mklower(token[0]);
- p = token[0];
- if(*p == '!'){
- p++;
- bang = 1;
- } else
- bang = 0;
- if(*p == '='){
- if(newpattern(Texact, p+1, bang) < 0)
- return -1;
- } else if(*p == '~'){
- if(newpattern(Tregexp, p+1, bang) < 0)
- return -1;
- } else if(strcmp(token[0], "#include") == 0 && n == 2)
- readpatterns(token[1]);
- }
- Bterm(b);
- return 0;
- }
- /* fuck, shit, bugger, damn */
- void regerror(char*)
- {
- }
- /*
- * check lower case version of address agains patterns
- */
- static Pattern*
- checkaddr(char *arg)
- {
- Pattern *p;
- Reprog *rp;
- String *s;
- s = s_copy(arg);
- mklower(s_to_c(s));
- for(p = patterns; p != nil; p = p->next)
- switch(p->type){
- case Texact:
- if(strcmp(p->arg, s_to_c(s)) == 0){
- free(s);
- return p;
- }
- break;
- case Tregexp:
- rp = regcomp(p->arg);
- if(rp == nil)
- continue;
- if(regexec(rp, s_to_c(s), nil, 0)){
- free(rp);
- free(s);
- return p;
- }
- free(rp);
- break;
- }
- s_free(s);
- return 0;
- }
- static char*
- check(int argc, char **argv)
- {
- int i;
- Addr *a;
- Pattern *p;
- int matchedbang;
- matchedbang = 0;
- for(i = 0; i < argc; i++){
- a = readaddrs(argv[i], nil);
- for(; a != nil; a = a->next){
- p = checkaddr(a->val);
- if(p == nil)
- continue;
- if(p->bang)
- matchedbang = 1;
- else
- return nil;
- }
- }
- if(matchedbang)
- return "!match";
- else
- return "no match";
- }
- /*
- * add anything that isn't already matched, all matches are lower case
- */
- static char*
- add(char *pp, int argc, char **argv)
- {
- int fd, i;
- String *s;
- char *cp;
- Addr *a;
- a = nil;
- for(i = 0; i < argc; i++)
- a = readaddrs(argv[i], a);
- fd = open(pp, OWRITE);
- seek(fd, 0, 2);
- for(; a != nil; a = a->next){
- if(checkaddr(a->val))
- continue;
- s = simplify(a->val);
- cp = s_to_c(s);
- fprint(fd, "%q\t%q\n", cp, a->val);
- if(*cp == '=')
- newpattern(Texact, cp+1, 0);
- else if(*cp == '~')
- newpattern(Tregexp, cp+1, 0);
- s_free(s);
- }
- close(fd);
- return nil;
- }
- void
- main(int argc, char **argv)
- {
- char *patternpath;
- ARGBEGIN {
- case 'd':
- debug++;
- break;
- } ARGEND;
- quotefmtinstall();
- if(argc < 3)
- usage();
- patternpath = argv[1];
- readpatterns(patternpath);
- if(strcmp(argv[0], "add") == 0)
- exits(add(patternpath, argc-2, argv+2));
- else if(strcmp(argv[0], "check") == 0)
- exits(check(argc-2, argv+2));
- else
- usage();
- }
|