123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122 |
- /*
- * this is a filter that changes mime types and names of
- * suspect executable attachments.
- */
- #include "common.h"
- #include <ctype.h>
- Biobuf in;
- Biobuf out;
- typedef struct Mtype Mtype;
- typedef struct Hdef Hdef;
- typedef struct Hline Hline;
- typedef struct Part Part;
- static int badfile(char *name);
- static int badtype(char *type);
- static void ctype(Part*, Hdef*, char*);
- static void cencoding(Part*, Hdef*, char*);
- static void cdisposition(Part*, Hdef*, char*);
- static int decquoted(char *out, char *in, char *e);
- static char* getstring(char *p, String *s, int dolower);
- static void init_hdefs(void);
- static int isattribute(char **pp, char *attr);
- static int latin1toutf(char *out, char *in, char *e);
- static String* mkboundary(void);
- static Part* part(Part *pp);
- static Part* passbody(Part *p, int dobound);
- static void passnotheader(void);
- static void passunixheader(void);
- static Part* problemchild(Part *p);
- static void readheader(Part *p);
- static Hline* readhl(void);
- static void readmtypes(void);
- static int save(Part *p, char *file);
- static void setfilename(Part *p, char *name);
- static char* skiptosemi(char *p);
- static char* skipwhite(char *p);
- static String* tokenconvert(String *t);
- static void writeheader(Part *p, int);
- enum
- {
- /* encodings */
- Enone= 0,
- Ebase64,
- Equoted,
- /* disposition possibilities */
- Dnone= 0,
- Dinline,
- Dfile,
- Dignore,
- PAD64= '=',
- };
- /*
- * a message part; either the whole message or a subpart
- */
- struct Part
- {
- Part *pp; /* parent part */
- Hline *hl; /* linked list of header lines */
- int disposition;
- int encoding;
- int badfile;
- int badtype;
- String *boundary; /* boundary for multiparts */
- int blen;
- String *charset; /* character set */
- String *type; /* content type */
- String *filename; /* file name */
- Biobuf *tmpbuf; /* diversion input buffer */
- };
- /*
- * a (multi)line header
- */
- struct Hline
- {
- Hline *next;
- String *s;
- };
- /*
- * header definitions for parsing
- */
- struct Hdef
- {
- char *type;
- void (*f)(Part*, Hdef*, char*);
- int len;
- };
- Hdef hdefs[] =
- {
- { "content-type:", ctype, },
- { "content-transfer-encoding:", cencoding, },
- { "content-disposition:", cdisposition, },
- { 0, },
- };
- /*
- * acceptable content types and their extensions
- */
- struct Mtype {
- Mtype *next;
- char *ext; /* extension */
- char *gtype; /* generic content type */
- char *stype; /* specific content type */
- char class;
- };
- Mtype *mtypes;
- int justreject;
- char *savefile;
- void
- usage(void)
- {
- fprint(2, "usage: upas/vf [-r] [-s savefile]\n");
- exits("usage");
- }
- void
- main(int argc, char **argv)
- {
- ARGBEGIN{
- case 'r':
- justreject = 1;
- break;
- case 's':
- savefile = EARGF(usage());
- break;
- default:
- usage();
- }ARGEND
- if(argc)
- usage();
- Binit(&in, 0, OREAD);
- Binit(&out, 1, OWRITE);
- init_hdefs();
- readmtypes();
- /* pass through our standard 'From ' line */
- passunixheader();
- /* parse with the top level part */
- part(nil);
- exits(0);
- }
- void
- refuse(char *reason)
- {
- static char msg[] =
- "mail refused: we don't accept executable attachments";
- postnote(PNGROUP, getpid(), smprint("%s: %s", msg, reason));
- exits(msg);
- }
- /*
- * parse a part; returns the ancestor whose boundary terminated
- * this part or nil on EOF.
- */
- static Part*
- part(Part *pp)
- {
- Part *p, *np;
- p = mallocz(sizeof *p, 1);
- p->pp = pp;
- readheader(p);
- if(p->boundary != nil){
- /* the format of a multipart part is always:
- * header
- * null or ignored body
- * boundary
- * header
- * body
- * boundary
- * ...
- */
- writeheader(p, 1);
- np = passbody(p, 1);
- if(np != p)
- return np;
- for(;;){
- np = part(p);
- if(np != p)
- return np;
- }
- } else {
- /* no boundary */
- /* may still be multipart if this is a forwarded message */
- if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){
- /* the format of forwarded message is:
- * header
- * header
- * body
- */
- writeheader(p, 1);
- passnotheader();
- return part(p);
- } else {
- /*
- * This is the meat. This may be an executable.
- * if so, wrap it and change its type
- */
- if(p->badtype || p->badfile){
- if(p->badfile == 2){
- if(savefile != nil)
- save(p, savefile);
- syslog(0, "vf", "vf rejected %s %s",
- p->type? s_to_c(p->type): "?",
- p->filename?s_to_c(p->filename):"?");
- fprint(2, "The mail contained an executable attachment.\n");
- fprint(2, "We refuse all mail containing such.\n");
- refuse(nil);
- }
- np = problemchild(p);
- if(np != p)
- return np;
- /* if problemchild returns p, it turns out p is okay: fall thru */
- }
- writeheader(p, 1);
- return passbody(p, 1);
- }
- }
- }
- /*
- * read and parse a complete header
- */
- static void
- readheader(Part *p)
- {
- Hline *hl, **l;
- Hdef *hd;
- l = &p->hl;
- for(;;){
- hl = readhl();
- if(hl == nil)
- break;
- *l = hl;
- l = &hl->next;
- for(hd = hdefs; hd->type != nil; hd++){
- if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){
- (*hd->f)(p, hd, s_to_c(hl->s));
- break;
- }
- }
- }
- }
- /*
- * read a possibly multiline header line
- */
- static Hline*
- readhl(void)
- {
- Hline *hl;
- String *s;
- char *p;
- int n;
- p = Brdline(&in, '\n');
- if(p == nil)
- return nil;
- n = Blinelen(&in);
- if(memchr(p, ':', n) == nil){
- Bseek(&in, -n, 1);
- return nil;
- }
- s = s_nappend(s_new(), p, n);
- for(;;){
- p = Brdline(&in, '\n');
- if(p == nil)
- break;
- n = Blinelen(&in);
- if(*p != ' ' && *p != '\t'){
- Bseek(&in, -n, 1);
- break;
- }
- s = s_nappend(s, p, n);
- }
- hl = malloc(sizeof *hl);
- hl->s = s;
- hl->next = nil;
- return hl;
- }
- /*
- * write out a complete header
- */
- static void
- writeheader(Part *p, int xfree)
- {
- Hline *hl, *next;
- for(hl = p->hl; hl != nil; hl = next){
- Bprint(&out, "%s", s_to_c(hl->s));
- if(xfree)
- s_free(hl->s);
- next = hl->next;
- if(xfree)
- free(hl);
- }
- if(xfree)
- p->hl = nil;
- }
- /*
- * pass a body through. return if we hit one of our ancestors'
- * boundaries or EOF. if we hit a boundary, return a pointer to
- * that ancestor. if we hit EOF, return nil.
- */
- static Part*
- passbody(Part *p, int dobound)
- {
- Part *pp;
- Biobuf *b;
- char *cp;
- for(;;){
- if(p->tmpbuf){
- b = p->tmpbuf;
- cp = Brdline(b, '\n');
- if(cp == nil){
- Bterm(b);
- p->tmpbuf = nil;
- goto Stdin;
- }
- }else{
- Stdin:
- b = ∈
- cp = Brdline(b, '\n');
- }
- if(cp == nil)
- return nil;
- for(pp = p; pp != nil; pp = pp->pp)
- if(pp->boundary != nil
- && strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){
- if(dobound)
- Bwrite(&out, cp, Blinelen(b));
- else
- Bseek(b, -Blinelen(b), 1);
- return pp;
- }
- Bwrite(&out, cp, Blinelen(b));
- }
- }
- /*
- * save the message somewhere
- */
- static vlong bodyoff; /* clumsy hack */
- static int
- save(Part *p, char *file)
- {
- int fd;
- char *cp;
- Bterm(&out);
- memset(&out, 0, sizeof(out));
- fd = open(file, OWRITE);
- if(fd < 0)
- return -1;
- seek(fd, 0, 2);
- Binit(&out, fd, OWRITE);
- cp = ctime(time(0));
- cp[28] = 0;
- Bprint(&out, "From virusfilter %s\n", cp);
- writeheader(p, 0);
- bodyoff = Boffset(&out);
- passbody(p, 1);
- Bprint(&out, "\n");
- Bterm(&out);
- close(fd);
- memset(&out, 0, sizeof out);
- Binit(&out, 1, OWRITE);
- return 0;
- }
- /*
- * write to a file but save the fd for passbody.
- */
- static char*
- savetmp(Part *p)
- {
- char *buf, *name;
- int fd;
- buf = smprint("%s/vf.XXXXXXXXXXX", UPASTMP);
- name = mktemp(buf);
- if((fd = create(name, OWRITE|OEXCL, 0666)) < 0){
- fprint(2, "%s: error creating temporary file: %r\n", argv0);
- refuse("can't create temporary file");
- }
- free(buf);
- close(fd);
- if(save(p, name) < 0){
- fprint(2, "%s: error saving temporary file: %r\n", argv0);
- refuse("can't write temporary file");
- }
- if(p->tmpbuf){
- fprint(2, "%s: error in savetmp: already have tmp file!\n",
- argv0);
- refuse("already have temporary file");
- }
- p->tmpbuf = Bopen(name, OREAD|ORCLOSE);
- if(p->tmpbuf == nil){
- fprint(2, "%s: error reading temporary file: %r\n", argv0);
- refuse("error reading temporary file");
- }
- Bseek(p->tmpbuf, bodyoff, 0);
- return strdup(name);
- }
- /*
- * Run the external checker to do content-based checks.
- */
- static int
- runchecker(Part *p)
- {
- int pid;
- char *name;
- Waitmsg *w;
- if(access("/mail/lib/validateattachment", AEXEC) < 0)
- return 0;
- name = savetmp(p);
- fprint(2, "run checker %s\n", name);
- switch(pid = fork()){
- case -1:
- sysfatal("fork: %r");
- case 0:
- dup(2, 1);
- execl("/mail/lib/validateattachment", "validateattachment", name, nil);
- _exits("exec failed");
- }
- /*
- * Okay to return on error - will let mail through but wrapped.
- */
- w = wait();
- if(w == nil){
- syslog(0, "mail", "vf wait failed: %r");
- return 0;
- }
- if(w->pid != pid){
- syslog(0, "mail", "vf wrong pid %d != %d", w->pid, pid);
- return 0;
- }
- if(p->filename)
- name = s_to_c(p->filename);
- if(strstr(w->msg, "discard")){
- syslog(0, "mail", "vf validateattachment rejected %s", name);
- refuse("rejected by validateattachment");
- }
- if(strstr(w->msg, "accept")){
- syslog(0, "mail", "vf validateattachment accepted %s", name);
- return 1;
- }
- free(w);
- return 0;
- }
- /*
- * emit a multipart Part that explains the problem
- */
- static Part*
- problemchild(Part *p)
- {
- Part *np;
- Hline *hl;
- String *boundary;
- char *cp;
- /*
- * We don't know whether the attachment is okay.
- * If there's an external checker, let it have a crack at it.
- */
- if(runchecker(p) > 0)
- return p;
- if(justreject)
- return p;
- fprint(2, "x\n");
- syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?",
- p->filename?s_to_c(p->filename):"?");
- fprint(2, "x\n");
- boundary = mkboundary();
- fprint(2, "x\n");
- /* print out non-mime headers */
- for(hl = p->hl; hl != nil; hl = hl->next)
- if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0)
- Bprint(&out, "%s", s_to_c(hl->s));
- fprint(2, "x\n");
- /* add in our own multipart headers and message */
- Bprint(&out, "Content-Type: multipart/mixed;\n");
- Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary));
- Bprint(&out, "Content-Disposition: inline\n");
- Bprint(&out, "\n");
- Bprint(&out, "This is a multi-part message in MIME format.\n");
- Bprint(&out, "--%s\n", s_to_c(boundary));
- Bprint(&out, "Content-Disposition: inline\n");
- Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n");
- Bprint(&out, "Content-Transfer-Encoding: 7bit\n");
- Bprint(&out, "\n");
- Bprint(&out, "from postmaster@%s:\n", sysname());
- Bprint(&out, "The following attachment had content that we can't\n");
- Bprint(&out, "prove to be harmless. To avoid possible automatic\n");
- Bprint(&out, "execution, we changed the content headers.\n");
- Bprint(&out, "The original header was:\n\n");
- /* print out original header lines */
- for(hl = p->hl; hl != nil; hl = hl->next)
- if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0)
- Bprint(&out, "\t%s", s_to_c(hl->s));
- Bprint(&out, "--%s\n", s_to_c(boundary));
- /* change file name */
- if(p->filename)
- s_append(p->filename, ".suspect");
- else
- p->filename = s_copy("file.suspect");
- /* print out new header */
- Bprint(&out, "Content-Type: application/octet-stream\n");
- Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename));
- switch(p->encoding){
- case Enone:
- break;
- case Ebase64:
- Bprint(&out, "Content-Transfer-Encoding: base64\n");
- break;
- case Equoted:
- Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n");
- break;
- }
- fprint(2, "z\n");
- /* pass the body */
- np = passbody(p, 0);
- fprint(2, "w\n");
- /* add the new boundary and the original terminator */
- Bprint(&out, "--%s--\n", s_to_c(boundary));
- if(np && np->boundary){
- cp = Brdline(&in, '\n');
- Bwrite(&out, cp, Blinelen(&in));
- }
- fprint(2, "a %p\n", np);
- return np;
- }
- static int
- isattribute(char **pp, char *attr)
- {
- char *p;
- int n;
- n = strlen(attr);
- p = *pp;
- if(cistrncmp(p, attr, n) != 0)
- return 0;
- p += n;
- while(*p == ' ')
- p++;
- if(*p++ != '=')
- return 0;
- while(*p == ' ')
- p++;
- *pp = p;
- return 1;
- }
- /*
- * parse content type header
- */
- static void
- ctype(Part *p, Hdef *h, char *cp)
- {
- String *s;
- cp += h->len;
- cp = skipwhite(cp);
- p->type = s_new();
- cp = getstring(cp, p->type, 1);
- if(badtype(s_to_c(p->type)))
- p->badtype = 1;
- while(*cp){
- if(isattribute(&cp, "boundary")){
- s = s_new();
- cp = getstring(cp, s, 0);
- p->boundary = s_reset(p->boundary);
- s_append(p->boundary, "--");
- s_append(p->boundary, s_to_c(s));
- p->blen = s_len(p->boundary);
- s_free(s);
- } else if(cistrncmp(cp, "multipart", 9) == 0){
- /*
- * the first unbounded part of a multipart message,
- * the preamble, is not displayed or saved
- */
- } else if(isattribute(&cp, "name")){
- setfilename(p, cp);
- } else if(isattribute(&cp, "charset")){
- if(p->charset == nil)
- p->charset = s_new();
- cp = getstring(cp, s_reset(p->charset), 0);
- }
- cp = skiptosemi(cp);
- }
- }
- /*
- * parse content encoding header
- */
- static void
- cencoding(Part *m, Hdef *h, char *p)
- {
- p += h->len;
- p = skipwhite(p);
- if(cistrncmp(p, "base64", 6) == 0)
- m->encoding = Ebase64;
- else if(cistrncmp(p, "quoted-printable", 16) == 0)
- m->encoding = Equoted;
- }
- /*
- * parse content disposition header
- */
- static void
- cdisposition(Part *p, Hdef *h, char *cp)
- {
- cp += h->len;
- cp = skipwhite(cp);
- while(*cp){
- if(cistrncmp(cp, "inline", 6) == 0){
- p->disposition = Dinline;
- } else if(cistrncmp(cp, "attachment", 10) == 0){
- p->disposition = Dfile;
- } else if(cistrncmp(cp, "filename=", 9) == 0){
- cp += 9;
- setfilename(p, cp);
- }
- cp = skiptosemi(cp);
- }
- }
- static void
- setfilename(Part *p, char *name)
- {
- if(p->filename == nil)
- p->filename = s_new();
- getstring(name, s_reset(p->filename), 0);
- p->filename = tokenconvert(p->filename);
- p->badfile = badfile(s_to_c(p->filename));
- }
- static char*
- skipwhite(char *p)
- {
- while(isspace(*p))
- p++;
- return p;
- }
- static char*
- skiptosemi(char *p)
- {
- while(*p && *p != ';')
- p++;
- while(*p == ';' || isspace(*p))
- p++;
- return p;
- }
- /*
- * parse a possibly "'d string from a header. A
- * ';' terminates the string.
- */
- static char*
- getstring(char *p, String *s, int dolower)
- {
- s = s_reset(s);
- p = skipwhite(p);
- if(*p == '"'){
- p++;
- for(;*p && *p != '"'; p++)
- if(dolower)
- s_putc(s, tolower(*p));
- else
- s_putc(s, *p);
- if(*p == '"')
- p++;
- s_terminate(s);
- return p;
- }
- for(; *p && !isspace(*p) && *p != ';'; p++)
- if(dolower)
- s_putc(s, tolower(*p));
- else
- s_putc(s, *p);
- s_terminate(s);
- return p;
- }
- static void
- init_hdefs(void)
- {
- Hdef *hd;
- static int already;
- if(already)
- return;
- already = 1;
- for(hd = hdefs; hd->type != nil; hd++)
- hd->len = strlen(hd->type);
- }
- /*
- * create a new boundary
- */
- static String*
- mkboundary(void)
- {
- char buf[32];
- int i;
- static int already;
- if(already == 0){
- srand((time(0)<<16)|getpid());
- already = 1;
- }
- strcpy(buf, "upas-");
- for(i = 5; i < sizeof(buf)-1; i++)
- buf[i] = 'a' + nrand(26);
- buf[i] = 0;
- return s_copy(buf);
- }
- /*
- * skip blank lines till header
- */
- static void
- passnotheader(void)
- {
- char *cp;
- int i, n;
- while((cp = Brdline(&in, '\n')) != nil){
- n = Blinelen(&in);
- for(i = 0; i < n-1; i++)
- if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){
- Bseek(&in, -n, 1);
- return;
- }
- Bwrite(&out, cp, n);
- }
- }
- /*
- * pass unix header lines
- */
- static void
- passunixheader(void)
- {
- char *p;
- int n;
- while((p = Brdline(&in, '\n')) != nil){
- n = Blinelen(&in);
- if(strncmp(p, "From ", 5) != 0){
- Bseek(&in, -n, 1);
- break;
- }
- Bwrite(&out, p, n);
- }
- }
- /*
- * Read mime types
- */
- static void
- readmtypes(void)
- {
- Biobuf *b;
- char *p;
- char *f[6];
- Mtype *m;
- Mtype **l;
- b = Bopen("/sys/lib/mimetype", OREAD);
- if(b == nil)
- return;
- l = &mtypes;
- while((p = Brdline(b, '\n')) != nil){
- if(*p == '#')
- continue;
- p[Blinelen(b)-1] = 0;
- if(tokenize(p, f, nelem(f)) < 5)
- continue;
- m = mallocz(sizeof *m, 1);
- if(m == nil)
- goto err;
- m->ext = strdup(f[0]);
- if(m->ext == 0)
- goto err;
- m->gtype = strdup(f[1]);
- if(m->gtype == 0)
- goto err;
- m->stype = strdup(f[2]);
- if(m->stype == 0)
- goto err;
- m->class = *f[4];
- *l = m;
- l = &(m->next);
- }
- Bterm(b);
- return;
- err:
- if(m == nil)
- return;
- free(m->ext);
- free(m->gtype);
- free(m->stype);
- free(m);
- Bterm(b);
- }
- /*
- * if the class is 'm' or 'y', accept it
- * if the class is 'p' check a previous extension
- * otherwise, filename is bad
- */
- static int
- badfile(char *name)
- {
- char *p;
- Mtype *m;
- int rv;
- p = strrchr(name, '.');
- if(p == nil)
- return 0;
- for(m = mtypes; m != nil; m = m->next)
- if(cistrcmp(p, m->ext) == 0){
- switch(m->class){
- case 'm':
- case 'y':
- return 0;
- case 'p':
- *p = 0;
- rv = badfile(name);
- *p = '.';
- return rv;
- case 'r':
- return 2;
- }
- }
- return 1;
- }
- /*
- * if the class is 'm' or 'y' or 'p', accept it
- * otherwise, filename is bad
- */
- static int
- badtype(char *type)
- {
- Mtype *m;
- char *s, *fix;
- int rv = 1;
- fix = s = strchr(type, '/');
- if(s != nil)
- *s++ = 0;
- else
- s = "-";
- for(m = mtypes; m != nil; m = m->next){
- if(cistrcmp(type, m->gtype) != 0)
- continue;
- if(cistrcmp(s, m->stype) != 0)
- continue;
- switch(m->class){
- case 'y':
- case 'p':
- case 'm':
- rv = 0;
- break;
- }
- break;
- }
- if(fix != nil)
- *fix = '/';
- return rv;
- }
- /* rfc2047 non-ascii */
- typedef struct Charset Charset;
- struct Charset {
- char *name;
- int len;
- int convert;
- } charsets[] =
- {
- { "us-ascii", 8, 1, },
- { "utf-8", 5, 0, },
- { "iso-8859-1", 10, 1, },
- };
- /*
- * convert to UTF if need be
- */
- static String*
- tokenconvert(String *t)
- {
- String *s;
- char decoded[1024];
- char utfbuf[2*1024];
- int i, len;
- char *e;
- char *token;
- token = s_to_c(t);
- len = s_len(t);
- if(token[0] != '=' || token[1] != '?' ||
- token[len-2] != '?' || token[len-1] != '=')
- goto err;
- e = token+len-2;
- token += 2;
- /* bail if we don't understand the character set */
- for(i = 0; i < nelem(charsets); i++)
- if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0)
- if(token[charsets[i].len] == '?'){
- token += charsets[i].len + 1;
- break;
- }
- if(i >= nelem(charsets))
- goto err;
- /* bail if it doesn't fit */
- if(strlen(token) > sizeof(decoded)-1)
- goto err;
- /* bail if we don't understand the encoding */
- if(cistrncmp(token, "b?", 2) == 0){
- token += 2;
- len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
- decoded[len] = 0;
- } else if(cistrncmp(token, "q?", 2) == 0){
- token += 2;
- len = decquoted(decoded, token, e);
- if(len > 0 && decoded[len-1] == '\n')
- len--;
- decoded[len] = 0;
- } else
- goto err;
- s = nil;
- switch(charsets[i].convert){
- case 0:
- s = s_copy(decoded);
- break;
- case 1:
- s = s_new();
- latin1toutf(utfbuf, decoded, decoded+len);
- s_append(s, utfbuf);
- break;
- }
- return s;
- err:
- return s_clone(t);
- }
- /*
- * decode quoted
- */
- enum
- {
- Self= 1,
- Hex= 2,
- };
- uchar tableqp[256];
- static void
- initquoted(void)
- {
- int c;
- memset(tableqp, 0, 256);
- for(c = ' '; c <= '<'; c++)
- tableqp[c] = Self;
- for(c = '>'; c <= '~'; c++)
- tableqp[c] = Self;
- tableqp['\t'] = Self;
- tableqp['='] = Hex;
- }
- static int
- hex2int(int x)
- {
- if(x >= '0' && x <= '9')
- return x - '0';
- if(x >= 'A' && x <= 'F')
- return (x - 'A') + 10;
- if(x >= 'a' && x <= 'f')
- return (x - 'a') + 10;
- return 0;
- }
- static char*
- decquotedline(char *out, char *in, char *e)
- {
- int c, soft;
- /* dump trailing white space */
- while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
- e--;
- /* trailing '=' means no newline */
- if(*e == '='){
- soft = 1;
- e--;
- } else
- soft = 0;
- while(in <= e){
- c = (*in++) & 0xff;
- switch(tableqp[c]){
- case Self:
- *out++ = c;
- break;
- case Hex:
- c = hex2int(*in++)<<4;
- c |= hex2int(*in++);
- *out++ = c;
- break;
- }
- }
- if(!soft)
- *out++ = '\n';
- *out = 0;
- return out;
- }
- static int
- decquoted(char *out, char *in, char *e)
- {
- char *p, *nl;
- if(tableqp[' '] == 0)
- initquoted();
- p = out;
- while((nl = strchr(in, '\n')) != nil && nl < e){
- p = decquotedline(p, in, nl);
- in = nl + 1;
- }
- if(in < e)
- p = decquotedline(p, in, e-1);
- /* make sure we end with a new line */
- if(*(p-1) != '\n'){
- *p++ = '\n';
- *p = 0;
- }
- return p - out;
- }
- /* translate latin1 directly since it fits neatly in utf */
- static int
- latin1toutf(char *out, char *in, char *e)
- {
- Rune r;
- char *p;
- p = out;
- for(; in < e; in++){
- r = (*in) & 0xff;
- p += runetochar(p, &r);
- }
- *p = 0;
- return p - out;
- }
|