/* * this is a filter that changes mime types and names of * suspect executable attachments. */ #include "common.h" #include Biobuf in; Biobuf out; typedef struct Mtype Mtype; typedef struct Hdef Hdef; typedef struct Hline Hline; typedef struct Part Part; static int badfile(char *name); static int badtype(char *type); static void ctype(Part*, Hdef*, char*); static void cencoding(Part*, Hdef*, char*); static void cdisposition(Part*, Hdef*, char*); static int decquoted(char *out, char *in, char *e); static char* getstring(char *p, String *s, int dolower); static void init_hdefs(void); static int isattribute(char **pp, char *attr); static int latin1toutf(char *out, char *in, char *e); static String* mkboundary(void); static Part* part(Part *pp); static Part* passbody(Part *p, int dobound); static void passnotheader(void); static void passunixheader(void); static Part* problemchild(Part *p); static void readheader(Part *p); static Hline* readhl(void); static void readmtypes(void); static void save(Part *p); static void setfilename(Part *p, char *name); static char* skiptosemi(char *p); static char* skipwhite(char *p); static String* tokenconvert(String *t); static void writeheader(Part *p); enum { // encodings Enone= 0, Ebase64, Equoted, // disposition possibilities Dnone= 0, Dinline, Dfile, Dignore, PAD64= '=', }; /* * a message part; either the whole message or a subpart */ struct Part { Part *pp; /* parent part */ Hline *hl; /* linked list of header lines */ int disposition; int encoding; int badfile; int badtype; String *boundary; /* boundary for multiparts */ int blen; String *charset; /* character set */ String *type; /* content type */ String *filename; /* content type */ }; /* * a (multi)line header */ struct Hline { Hline *next; String *s; }; /* * header definitions for parsing */ struct Hdef { char *type; void (*f)(Part*, Hdef*, char*); int len; }; Hdef hdefs[] = { { "content-type:", ctype, }, { "content-transfer-encoding:", cencoding, }, { "content-disposition:", cdisposition, }, { 0, }, }; /* * acceptable content types and their extensions */ struct Mtype { Mtype *next; char *ext; /* extension */ char *gtype; /* generic content type */ char *stype; /* specific content type */ char class; }; Mtype *mtypes; int justreject; char *savefile; void main(int argc, char **argv) { ARGBEGIN{ case 'r': justreject = 1; break; case 's': savefile = ARGF(); if(savefile == nil) exits("usage"); break; }ARGEND; Binit(&in, 0, OREAD); Binit(&out, 1, OWRITE); init_hdefs(); readmtypes(); /* pass through our standard 'From ' line */ passunixheader(); /* parse with the top level part */ part(nil); exits(0); } /* * parse a part; returns the ancestor whose boundary terminated * this part or nil on EOF. */ static Part* part(Part *pp) { Part *p, *np; p = mallocz(sizeof *p, 1); p->pp = pp; readheader(p); if(p->boundary != nil){ /* the format of a multipart part is always: * header * null or ignored body * boundary * header * body * boundary * ... */ writeheader(p); np = passbody(p, 1); if(np != p) return np; for(;;){ np = part(p); if(np != p) return np; } } else { /* no boundary */ /* may still be multipart if this is a forwarded message */ if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){ /* the format of forwarded message is: * header * header * body */ writeheader(p); passnotheader(); return part(p); } else { /* This is the meat. This may be an executable. * if so, wrap it and change its type */ if(p->badtype || p->badfile){ if(p->badfile == 2){ if(savefile != nil) save(p); syslog(0, "vf", "vf rejected %s %s", p->type?s_to_c(p->type):"?", p->filename?s_to_c(p->filename):"?"); fprint(2, "The mail contained an executable attachment.\n"); fprint(2, "We refuse all mail containing such.\n"); postnote(PNGROUP, getpid(), "mail refused: we don't accept executable attachments"); exits("mail refused: we don't accept executable attachments"); } return problemchild(p); } else { writeheader(p); return passbody(p, 1); } } } } /* * read and parse a complete header */ static void readheader(Part *p) { Hline *hl, **l; Hdef *hd; l = &p->hl; for(;;){ hl = readhl(); if(hl == nil) break; *l = hl; l = &hl->next; for(hd = hdefs; hd->type != nil; hd++){ if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){ (*hd->f)(p, hd, s_to_c(hl->s)); break; } } } } /* * read a possibly multiline header line */ static Hline* readhl(void) { Hline *hl; String *s; char *p; int n; p = Brdline(&in, '\n'); if(p == nil) return nil; n = Blinelen(&in); if(memchr(p, ':', n) == nil){ Bseek(&in, -n, 1); return nil; } s = s_nappend(s_new(), p, n); for(;;){ p = Brdline(&in, '\n'); if(p == nil) break; n = Blinelen(&in); if(*p != ' ' && *p != '\t'){ Bseek(&in, -n, 1); break; } s = s_nappend(s, p, n); } hl = malloc(sizeof *hl); hl->s = s; hl->next = nil; return hl; } /* * write out a complete header */ static void writeheader(Part *p) { Hline *hl, *next; for(hl = p->hl; hl != nil; hl = next){ Bprint(&out, "%s", s_to_c(hl->s)); s_free(hl->s); next = hl->next; free(hl); } p->hl = nil; } /* * pass a body through. return if we hit one of our ancestors' * boundaries or EOF. if we hit a boundary, return a pointer to * that ancestor. if we hit EOF, return nil. */ static Part* passbody(Part *p, int dobound) { Part *pp; char *cp; for(;;){ cp = Brdline(&in, '\n'); if(cp == nil) return nil; for(pp = p; pp != nil; pp = pp->pp) if(pp->boundary != nil && strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){ if(dobound) Bwrite(&out, cp, Blinelen(&in)); else Bseek(&in, -Blinelen(&in), 1); return pp; } Bwrite(&out, cp, Blinelen(&in)); } return nil; } /* * save the message somewhere */ static void save(Part *p) { int fd; char *cp; Bterm(&out); memset(&out, 0, sizeof(out)); fd = open(savefile, OWRITE); if(fd < 0) return; seek(fd, 0, 2); Binit(&out, fd, OWRITE); cp = ctime(time(0)); cp[28] = 0; Bprint(&out, "From virusfilter %s\n", cp); writeheader(p); passbody(p, 1); Bprint(&out, "\n"); Bterm(&out); close(fd); } /* * emit a multipart Part that explains the problem */ static Part* problemchild(Part *p) { Part *np; Hline *hl; String *boundary; char *cp; syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?", p->filename?s_to_c(p->filename):"?"); boundary = mkboundary(); /* print out non-mime headers */ for(hl = p->hl; hl != nil; hl = hl->next) if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0) Bprint(&out, "%s", s_to_c(hl->s)); /* add in out own multipart headers and message */ Bprint(&out, "Content-Type: multipart/mixed;\n"); Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary)); Bprint(&out, "Content-Disposition: inline\n"); Bprint(&out, "\n"); Bprint(&out, "This is a multi-part message in MIME format.\n"); Bprint(&out, "--%s\n", s_to_c(boundary)); Bprint(&out, "Content-Disposition: inline\n"); Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n"); Bprint(&out, "Content-Transfer-Encoding: 7bit\n"); Bprint(&out, "\n"); Bprint(&out, "from postmaster@%s:\n", sysname()); Bprint(&out, "The following attachment had content that we can't\n"); Bprint(&out, "prove to be harmless. To avoid possible automatic\n"); Bprint(&out, "execution, we changed the content headers.\n"); Bprint(&out, "The original header was:\n\n"); /* print out original header lines */ for(hl = p->hl; hl != nil; hl = hl->next) if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0) Bprint(&out, "\t%s", s_to_c(hl->s)); Bprint(&out, "--%s\n", s_to_c(boundary)); /* change file name */ if(p->filename) s_append(p->filename, ".suspect"); else p->filename = s_copy("file.suspect"); /* print out new header */ Bprint(&out, "Content-Type: application/octet-stream\n"); Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename)); switch(p->encoding){ case Enone: break; case Ebase64: Bprint(&out, "Content-Transfer-Encoding: base64\n"); break; case Equoted: Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n"); break; } /* pass the body */ np = passbody(p, 0); /* add the new boundary and the original terminator */ Bprint(&out, "--%s--\n", s_to_c(boundary)); if(np && np->boundary){ cp = Brdline(&in, '\n'); Bwrite(&out, cp, Blinelen(&in)); } return np; } static int isattribute(char **pp, char *attr) { char *p; int n; n = strlen(attr); p = *pp; if(cistrncmp(p, attr, n) != 0) return 0; p += n; while(*p == ' ') p++; if(*p++ != '=') return 0; while(*p == ' ') p++; *pp = p; return 1; } /* * parse content type header */ static void ctype(Part *p, Hdef *h, char *cp) { String *s; cp += h->len; cp = skipwhite(cp); p->type = s_new(); cp = getstring(cp, p->type, 1); if(badtype(s_to_c(p->type))) p->badtype = 1; while(*cp){ if(isattribute(&cp, "boundary")){ s = s_new(); cp = getstring(cp, s, 0); p->boundary = s_reset(p->boundary); s_append(p->boundary, "--"); s_append(p->boundary, s_to_c(s)); p->blen = s_len(p->boundary); s_free(s); } else if(cistrncmp(cp, "multipart", 9) == 0){ /* * the first unbounded part of a multipart message, * the preamble, is not displayed or saved */ } else if(isattribute(&cp, "name")){ setfilename(p, cp); } else if(isattribute(&cp, "charset")){ if(p->charset == nil) p->charset = s_new(); cp = getstring(cp, s_reset(p->charset), 0); } cp = skiptosemi(cp); } } /* * parse content encoding header */ static void cencoding(Part *m, Hdef *h, char *p) { p += h->len; p = skipwhite(p); if(cistrncmp(p, "base64", 6) == 0) m->encoding = Ebase64; else if(cistrncmp(p, "quoted-printable", 16) == 0) m->encoding = Equoted; } /* * parse content disposition header */ static void cdisposition(Part *p, Hdef *h, char *cp) { cp += h->len; cp = skipwhite(cp); while(*cp){ if(cistrncmp(cp, "inline", 6) == 0){ p->disposition = Dinline; } else if(cistrncmp(cp, "attachment", 10) == 0){ p->disposition = Dfile; } else if(cistrncmp(cp, "filename=", 9) == 0){ cp += 9; setfilename(p, cp); } cp = skiptosemi(cp); } } static void setfilename(Part *p, char *name) { if(p->filename == nil) p->filename = s_new(); getstring(name, s_reset(p->filename), 0); p->filename = tokenconvert(p->filename); p->badfile = badfile(s_to_c(p->filename)); } static char* skipwhite(char *p) { while(isspace(*p)) p++; return p; } static char* skiptosemi(char *p) { while(*p && *p != ';') p++; while(*p == ';' || isspace(*p)) p++; return p; } /* * parse a possibly "'d string from a header. A * ';' terminates the string. */ static char* getstring(char *p, String *s, int dolower) { s = s_reset(s); p = skipwhite(p); if(*p == '"'){ p++; for(;*p && *p != '"'; p++) if(dolower) s_putc(s, tolower(*p)); else s_putc(s, *p); if(*p == '"') p++; s_terminate(s); return p; } for(; *p && !isspace(*p) && *p != ';'; p++) if(dolower) s_putc(s, tolower(*p)); else s_putc(s, *p); s_terminate(s); return p; } static void init_hdefs(void) { Hdef *hd; static int already; if(already) return; already = 1; for(hd = hdefs; hd->type != nil; hd++) hd->len = strlen(hd->type); } /* * create a new boundary */ static String* mkboundary(void) { char buf[32]; int i; static int already; if(already == 0){ srand((time(0)<<16)|getpid()); already = 1; } strcpy(buf, "upas-"); for(i = 5; i < sizeof(buf)-1; i++) buf[i] = 'a' + nrand(26); buf[i] = 0; return s_copy(buf); } /* * skip blank lines till header */ static void passnotheader(void) { char *cp; int i, n; while((cp = Brdline(&in, '\n')) != nil){ n = Blinelen(&in); for(i = 0; i < n-1; i++) if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){ Bseek(&in, -n, 1); return; } Bwrite(&out, cp, n); } } /* * pass unix header lines */ static void passunixheader(void) { char *p; int n; while((p = Brdline(&in, '\n')) != nil){ n = Blinelen(&in); if(strncmp(p, "From ", 5) != 0){ Bseek(&in, -n, 1); break; } Bwrite(&out, p, n); } } /* * Read mime types */ static void readmtypes(void) { Biobuf *b; char *p; char *f[6]; Mtype *m; Mtype **l; b = Bopen("/sys/lib/mimetype", OREAD); if(b == nil) return; l = &mtypes; while((p = Brdline(b, '\n')) != nil){ if(*p == '#') continue; p[Blinelen(b)-1] = 0; if(tokenize(p, f, nelem(f)) < 5) continue; m = mallocz(sizeof *m, 1); if(m == nil) goto err; m->ext = strdup(f[0]); if(m->ext == 0) goto err; m->gtype = strdup(f[1]); if(m->gtype == 0) goto err; m->stype = strdup(f[2]); if(m->stype == 0) goto err; m->class = *f[4]; *l = m; l = &(m->next); } Bterm(b); return; err: if(m == nil) return; free(m->ext); free(m->gtype); free(m->stype); free(m); Bterm(b); } /* * if the class is 'm' or 'y', accept it * if the class is 'p' check a previous extension * otherwise, filename is bad */ static int badfile(char *name) { char *p; Mtype *m; int rv; p = strrchr(name, '.'); if(p == nil) return 0; for(m = mtypes; m != nil; m = m->next) if(cistrcmp(p, m->ext) == 0){ switch(m->class){ case 'm': case 'y': return 0; case 'p': *p = 0; rv = badfile(name); *p = '.'; return rv; case 'r': return 2; } } if(justreject) return 0; return 1; } /* * if the class is 'm' or 'y' or 'p', accept it * otherwise, filename is bad */ static int badtype(char *type) { Mtype *m; char *s, *fix; int rv = 1; if(justreject) return 0; fix = s = strchr(type, '/'); if(s != nil) *s++ = 0; else s = "-"; for(m = mtypes; m != nil; m = m->next){ if(cistrcmp(type, m->gtype) != 0) continue; if(cistrcmp(s, m->stype) != 0) continue; switch(m->class){ case 'y': case 'p': case 'm': rv = 0; break; } break; } if(fix != nil) *fix = '/'; return rv; } /* rfc2047 non-ascii */ typedef struct Charset Charset; struct Charset { char *name; int len; int convert; } charsets[] = { { "us-ascii", 8, 1, }, { "utf-8", 5, 0, }, { "iso-8859-1", 10, 1, }, }; /* * convert to UTF if need be */ static String* tokenconvert(String *t) { String *s; char decoded[1024]; char utfbuf[2*1024]; int i, len; char *e; char *token; token = s_to_c(t); len = s_len(t); if(token[0] != '=' || token[1] != '?' || token[len-2] != '?' || token[len-1] != '=') goto err; e = token+len-2; token += 2; // bail if we don't understand the character set for(i = 0; i < nelem(charsets); i++) if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0) if(token[charsets[i].len] == '?'){ token += charsets[i].len + 1; break; } if(i >= nelem(charsets)) goto err; // bail if it doesn't fit if(strlen(token) > sizeof(decoded)-1) goto err; // bail if we don't understand the encoding if(cistrncmp(token, "b?", 2) == 0){ token += 2; len = dec64((uchar*)decoded, sizeof(decoded), token, e-token); decoded[len] = 0; } else if(cistrncmp(token, "q?", 2) == 0){ token += 2; len = decquoted(decoded, token, e); if(len > 0 && decoded[len-1] == '\n') len--; decoded[len] = 0; } else goto err; s = nil; switch(charsets[i].convert){ case 0: s = s_copy(decoded); break; case 1: s = s_new(); latin1toutf(utfbuf, decoded, decoded+len); s_append(s, utfbuf); break; } return s; err: return s_clone(t); } /* * decode quoted */ enum { Self= 1, Hex= 2, }; uchar tableqp[256]; static void initquoted(void) { int c; memset(tableqp, 0, 256); for(c = ' '; c <= '<'; c++) tableqp[c] = Self; for(c = '>'; c <= '~'; c++) tableqp[c] = Self; tableqp['\t'] = Self; tableqp['='] = Hex; } static int hex2int(int x) { if(x >= '0' && x <= '9') return x - '0'; if(x >= 'A' && x <= 'F') return (x - 'A') + 10; if(x >= 'a' && x <= 'f') return (x - 'a') + 10; return 0; } static char* decquotedline(char *out, char *in, char *e) { int c, soft; /* dump trailing white space */ while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n')) e--; /* trailing '=' means no newline */ if(*e == '='){ soft = 1; e--; } else soft = 0; while(in <= e){ c = (*in++) & 0xff; switch(tableqp[c]){ case Self: *out++ = c; break; case Hex: c = hex2int(*in++)<<4; c |= hex2int(*in++); *out++ = c; break; } } if(!soft) *out++ = '\n'; *out = 0; return out; } static int decquoted(char *out, char *in, char *e) { char *p, *nl; if(tableqp[' '] == 0) initquoted(); p = out; while((nl = strchr(in, '\n')) != nil && nl < e){ p = decquotedline(p, in, nl); in = nl + 1; } if(in < e) p = decquotedline(p, in, e-1); // make sure we end with a new line if(*(p-1) != '\n'){ *p++ = '\n'; *p = 0; } return p - out; } /* translate latin1 directly since it fits neatly in utf */ static int latin1toutf(char *out, char *in, char *e) { Rune r; char *p; p = out; for(; in < e; in++){ r = (*in) & 0xff; p += runetochar(p, &r); } *p = 0; return p - out; }