12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127 |
- /*
- * this is a filter that changes mime types and names of
- * suspect executable attachments.
- */
- #include "common.h"
- #include <ctype.h>
- Biobuf in;
- Biobuf out;
- typedef struct Mtype Mtype;
- typedef struct Hdef Hdef;
- typedef struct Hline Hline;
- typedef struct Part Part;
- static int badfile(char *name);
- static int badtype(char *type);
- static void ctype(Part*, Hdef*, char*);
- static void cencoding(Part*, Hdef*, char*);
- static void cdisposition(Part*, Hdef*, char*);
- static int decquoted(char *out, char *in, char *e);
- static char* getstring(char *p, String *s, int dolower);
- static void init_hdefs(void);
- static int isattribute(char **pp, char *attr);
- static int latin1toutf(char *out, char *in, char *e);
- static String* mkboundary(void);
- static Part* part(Part *pp);
- static Part* passbody(Part *p, int dobound);
- static void passnotheader(void);
- static void passunixheader(void);
- static Part* problemchild(Part *p);
- static void readheader(Part *p);
- static Hline* readhl(void);
- static void readmtypes(void);
- static int save(Part *p, char *file);
- static void setfilename(Part *p, char *name);
- static char* skiptosemi(char *p);
- static char* skipwhite(char *p);
- static String* tokenconvert(String *t);
- static void writeheader(Part *p, int);
- enum
- {
- /* encodings */
- Enone= 0,
- Ebase64,
- Equoted,
- /* disposition possibilities */
- Dnone= 0,
- Dinline,
- Dfile,
- Dignore,
- PAD64= '=',
- };
- /*
- * a message part; either the whole message or a subpart
- */
- struct Part
- {
- Part *pp; /* parent part */
- Hline *hl; /* linked list of header lines */
- int disposition;
- int encoding;
- int badfile;
- int badtype;
- String *boundary; /* boundary for multiparts */
- int blen;
- String *charset; /* character set */
- String *type; /* content type */
- String *filename; /* file name */
- Biobuf *tmpbuf; /* diversion input buffer */
- };
- /*
- * a (multi)line header
- */
- struct Hline
- {
- Hline *next;
- String *s;
- };
- /*
- * header definitions for parsing
- */
- struct Hdef
- {
- char *type;
- void (*f)(Part*, Hdef*, char*);
- int len;
- };
- Hdef hdefs[] =
- {
- { "content-type:", ctype, },
- { "content-transfer-encoding:", cencoding, },
- { "content-disposition:", cdisposition, },
- { 0, },
- };
- /*
- * acceptable content types and their extensions
- */
- struct Mtype {
- Mtype *next;
- char *ext; /* extension */
- char *gtype; /* generic content type */
- char *stype; /* specific content type */
- char class;
- };
- Mtype *mtypes;
- int justreject;
- char *savefile;
- void
- usage(void)
- {
- fprint(2, "usage: upas/vf [-r] [-s savefile]\n");
- exits("usage");
- }
- void
- main(int argc, char **argv)
- {
- ARGBEGIN{
- case 'r':
- justreject = 1;
- break;
- case 's':
- savefile = EARGF(usage());
- break;
- default:
- usage();
- }ARGEND
- if(argc)
- usage();
- Binit(&in, 0, OREAD);
- Binit(&out, 1, OWRITE);
- init_hdefs();
- readmtypes();
- /* pass through our standard 'From ' line */
- passunixheader();
- /* parse with the top level part */
- part(nil);
- exits(0);
- }
- void
- refuse(char *reason)
- {
- char *full;
- static char msg[] =
- "mail refused: we don't accept executable attachments";
- full = smprint("%s: %s", msg, reason);
- postnote(PNGROUP, getpid(), full);
- exits(full);
- }
- /*
- * parse a part; returns the ancestor whose boundary terminated
- * this part or nil on EOF.
- */
- static Part*
- part(Part *pp)
- {
- Part *p, *np;
- p = mallocz(sizeof *p, 1);
- p->pp = pp;
- readheader(p);
- if(p->boundary != nil){
- /* the format of a multipart part is always:
- * header
- * null or ignored body
- * boundary
- * header
- * body
- * boundary
- * ...
- */
- writeheader(p, 1);
- np = passbody(p, 1);
- if(np != p)
- return np;
- for(;;){
- np = part(p);
- if(np != p)
- return np;
- }
- } else {
- /* no boundary */
- /* may still be multipart if this is a forwarded message */
- if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){
- /* the format of forwarded message is:
- * header
- * header
- * body
- */
- writeheader(p, 1);
- passnotheader();
- return part(p);
- } else {
- /*
- * This is the meat. This may be an executable.
- * if so, wrap it and change its type
- */
- if(p->badtype || p->badfile){
- if(p->badfile == 2){
- if(savefile != nil)
- save(p, savefile);
- syslog(0, "vf", "vf rejected %s %s",
- p->type? s_to_c(p->type): "?",
- p->filename?s_to_c(p->filename):"?");
- fprint(2, "The mail contained an executable attachment.\n");
- fprint(2, "We refuse all mail containing such.\n");
- refuse(nil);
- }
- np = problemchild(p);
- if(np != p)
- return np;
- /* if problemchild returns p, it turns out p is okay: fall thru */
- }
- writeheader(p, 1);
- return passbody(p, 1);
- }
- }
- }
- /*
- * read and parse a complete header
- */
- static void
- readheader(Part *p)
- {
- Hline *hl, **l;
- Hdef *hd;
- l = &p->hl;
- for(;;){
- hl = readhl();
- if(hl == nil)
- break;
- *l = hl;
- l = &hl->next;
- for(hd = hdefs; hd->type != nil; hd++){
- if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){
- (*hd->f)(p, hd, s_to_c(hl->s));
- break;
- }
- }
- }
- }
- /*
- * read a possibly multiline header line
- */
- static Hline*
- readhl(void)
- {
- Hline *hl;
- String *s;
- char *p;
- int n;
- p = Brdline(&in, '\n');
- if(p == nil)
- return nil;
- n = Blinelen(&in);
- if(memchr(p, ':', n) == nil){
- Bseek(&in, -n, 1);
- return nil;
- }
- s = s_nappend(s_new(), p, n);
- for(;;){
- p = Brdline(&in, '\n');
- if(p == nil)
- break;
- n = Blinelen(&in);
- if(*p != ' ' && *p != '\t'){
- Bseek(&in, -n, 1);
- break;
- }
- s = s_nappend(s, p, n);
- }
- hl = malloc(sizeof *hl);
- hl->s = s;
- hl->next = nil;
- return hl;
- }
- /*
- * write out a complete header
- */
- static void
- writeheader(Part *p, int xfree)
- {
- Hline *hl, *next;
- for(hl = p->hl; hl != nil; hl = next){
- Bprint(&out, "%s", s_to_c(hl->s));
- if(xfree)
- s_free(hl->s);
- next = hl->next;
- if(xfree)
- free(hl);
- }
- if(xfree)
- p->hl = nil;
- }
- /*
- * pass a body through. return if we hit one of our ancestors'
- * boundaries or EOF. if we hit a boundary, return a pointer to
- * that ancestor. if we hit EOF, return nil.
- */
- static Part*
- passbody(Part *p, int dobound)
- {
- Part *pp;
- Biobuf *b;
- char *cp;
- for(;;){
- if(p->tmpbuf){
- b = p->tmpbuf;
- cp = Brdline(b, '\n');
- if(cp == nil){
- Bterm(b);
- p->tmpbuf = nil;
- goto Stdin;
- }
- }else{
- Stdin:
- b = ∈
- cp = Brdline(b, '\n');
- }
- if(cp == nil)
- return nil;
- for(pp = p; pp != nil; pp = pp->pp)
- if(pp->boundary != nil
- && strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){
- if(dobound)
- Bwrite(&out, cp, Blinelen(b));
- else
- Bseek(b, -Blinelen(b), 1);
- return pp;
- }
- Bwrite(&out, cp, Blinelen(b));
- }
- }
- /*
- * save the message somewhere
- */
- static vlong bodyoff; /* clumsy hack */
- static int
- save(Part *p, char *file)
- {
- int fd;
- char *cp;
- Bterm(&out);
- memset(&out, 0, sizeof(out));
- fd = open(file, OWRITE);
- if(fd < 0)
- return -1;
- seek(fd, 0, 2);
- Binit(&out, fd, OWRITE);
- cp = ctime(time(0));
- cp[28] = 0;
- Bprint(&out, "From virusfilter %s\n", cp);
- writeheader(p, 0);
- bodyoff = Boffset(&out);
- passbody(p, 1);
- Bprint(&out, "\n");
- Bterm(&out);
- close(fd);
- memset(&out, 0, sizeof out);
- Binit(&out, 1, OWRITE);
- return 0;
- }
- /*
- * write to a file but save the fd for passbody.
- */
- static char*
- savetmp(Part *p)
- {
- char *name;
- int fd;
- name = mktemp(smprint("%s/vf.XXXXXXXXXXX", UPASTMP));
- if((fd = create(name, OWRITE|OEXCL, 0666)) < 0){
- fprint(2, "%s: error creating temporary file: %r\n", argv0);
- refuse("can't create temporary file");
- }
- close(fd);
- if(save(p, name) < 0){
- fprint(2, "%s: error saving temporary file: %r\n", argv0);
- refuse("can't write temporary file");
- }
- if(p->tmpbuf){
- fprint(2, "%s: error in savetmp: already have tmp file!\n",
- argv0);
- refuse("already have temporary file");
- }
- p->tmpbuf = Bopen(name, OREAD|ORCLOSE);
- if(p->tmpbuf == nil){
- fprint(2, "%s: error reading temporary file: %r\n", argv0);
- refuse("error reading temporary file");
- }
- Bseek(p->tmpbuf, bodyoff, 0);
- return name;
- }
- /*
- * Run the external checker to do content-based checks.
- */
- static int
- runchecker(Part *p)
- {
- int pid;
- char *name;
- Waitmsg *w;
- if(access("/mail/lib/validateattachment", AEXEC) < 0)
- return 0;
- name = savetmp(p);
- fprint(2, "run checker %s\n", name);
- switch(pid = fork()){
- case -1:
- sysfatal("fork: %r");
- case 0:
- dup(2, 1);
- execl("/mail/lib/validateattachment", "validateattachment",
- name, nil);
- _exits("exec failed");
- }
- /*
- * Okay to return on error - will let mail through but wrapped.
- */
- w = wait();
- if(w == nil){
- syslog(0, "mail", "vf wait failed: %r");
- return 0;
- }
- if(w->pid != pid){
- syslog(0, "mail", "vf wrong pid %d != %d", w->pid, pid);
- return 0;
- }
- if(p->filename) {
- free(name);
- name = strdup(s_to_c(p->filename));
- }
- if(strstr(w->msg, "discard")){
- syslog(0, "mail", "vf validateattachment rejected %s", name);
- refuse("rejected by validateattachment");
- }
- if(strstr(w->msg, "accept")){
- syslog(0, "mail", "vf validateattachment accepted %s", name);
- return 1;
- }
- free(w);
- free(name);
- return 0;
- }
- /*
- * emit a multipart Part that explains the problem
- */
- static Part*
- problemchild(Part *p)
- {
- Part *np;
- Hline *hl;
- String *boundary;
- char *cp;
- /*
- * We don't know whether the attachment is okay.
- * If there's an external checker, let it have a crack at it.
- */
- if(runchecker(p) > 0)
- return p;
- if(justreject)
- return p;
- fprint(2, "x\n");
- syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?",
- p->filename?s_to_c(p->filename):"?");
- fprint(2, "x\n");
- boundary = mkboundary();
- fprint(2, "x\n");
- /* print out non-mime headers */
- for(hl = p->hl; hl != nil; hl = hl->next)
- if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0)
- Bprint(&out, "%s", s_to_c(hl->s));
- fprint(2, "x\n");
- /* add in our own multipart headers and message */
- Bprint(&out, "Content-Type: multipart/mixed;\n");
- Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary));
- Bprint(&out, "Content-Disposition: inline\n");
- Bprint(&out, "\n");
- Bprint(&out, "This is a multi-part message in MIME format.\n");
- Bprint(&out, "--%s\n", s_to_c(boundary));
- Bprint(&out, "Content-Disposition: inline\n");
- Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n");
- Bprint(&out, "Content-Transfer-Encoding: 7bit\n");
- Bprint(&out, "\n");
- Bprint(&out, "from postmaster@%s:\n", sysname());
- Bprint(&out, "The following attachment had content that we can't\n");
- Bprint(&out, "prove to be harmless. To avoid possible automatic\n");
- Bprint(&out, "execution, we changed the content headers.\n");
- Bprint(&out, "The original header was:\n\n");
- /* print out original header lines */
- for(hl = p->hl; hl != nil; hl = hl->next)
- if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0)
- Bprint(&out, "\t%s", s_to_c(hl->s));
- Bprint(&out, "--%s\n", s_to_c(boundary));
- /* change file name */
- if(p->filename)
- s_append(p->filename, ".suspect");
- else
- p->filename = s_copy("file.suspect");
- /* print out new header */
- Bprint(&out, "Content-Type: application/octet-stream\n");
- Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename));
- switch(p->encoding){
- case Enone:
- break;
- case Ebase64:
- Bprint(&out, "Content-Transfer-Encoding: base64\n");
- break;
- case Equoted:
- Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n");
- break;
- }
- fprint(2, "z\n");
- /* pass the body */
- np = passbody(p, 0);
- fprint(2, "w\n");
- /* add the new boundary and the original terminator */
- Bprint(&out, "--%s--\n", s_to_c(boundary));
- if(np && np->boundary){
- cp = Brdline(&in, '\n');
- Bwrite(&out, cp, Blinelen(&in));
- }
- fprint(2, "a %p\n", np);
- return np;
- }
- static int
- isattribute(char **pp, char *attr)
- {
- char *p;
- int n;
- n = strlen(attr);
- p = *pp;
- if(cistrncmp(p, attr, n) != 0)
- return 0;
- p += n;
- while(*p == ' ')
- p++;
- if(*p++ != '=')
- return 0;
- while(*p == ' ')
- p++;
- *pp = p;
- return 1;
- }
- /*
- * parse content type header
- */
- static void
- ctype(Part *p, Hdef *h, char *cp)
- {
- String *s;
- cp += h->len;
- cp = skipwhite(cp);
- p->type = s_new();
- cp = getstring(cp, p->type, 1);
- if(badtype(s_to_c(p->type)))
- p->badtype = 1;
- while(*cp){
- if(isattribute(&cp, "boundary")){
- s = s_new();
- cp = getstring(cp, s, 0);
- p->boundary = s_reset(p->boundary);
- s_append(p->boundary, "--");
- s_append(p->boundary, s_to_c(s));
- p->blen = s_len(p->boundary);
- s_free(s);
- } else if(cistrncmp(cp, "multipart", 9) == 0){
- /*
- * the first unbounded part of a multipart message,
- * the preamble, is not displayed or saved
- */
- } else if(isattribute(&cp, "name")){
- setfilename(p, cp);
- } else if(isattribute(&cp, "charset")){
- if(p->charset == nil)
- p->charset = s_new();
- cp = getstring(cp, s_reset(p->charset), 0);
- }
- cp = skiptosemi(cp);
- }
- }
- /*
- * parse content encoding header
- */
- static void
- cencoding(Part *m, Hdef *h, char *p)
- {
- p += h->len;
- p = skipwhite(p);
- if(cistrncmp(p, "base64", 6) == 0)
- m->encoding = Ebase64;
- else if(cistrncmp(p, "quoted-printable", 16) == 0)
- m->encoding = Equoted;
- }
- /*
- * parse content disposition header
- */
- static void
- cdisposition(Part *p, Hdef *h, char *cp)
- {
- cp += h->len;
- cp = skipwhite(cp);
- while(*cp){
- if(cistrncmp(cp, "inline", 6) == 0){
- p->disposition = Dinline;
- } else if(cistrncmp(cp, "attachment", 10) == 0){
- p->disposition = Dfile;
- } else if(cistrncmp(cp, "filename=", 9) == 0){
- cp += 9;
- setfilename(p, cp);
- }
- cp = skiptosemi(cp);
- }
- }
- static void
- setfilename(Part *p, char *name)
- {
- if(p->filename == nil)
- p->filename = s_new();
- getstring(name, s_reset(p->filename), 0);
- p->filename = tokenconvert(p->filename);
- p->badfile = badfile(s_to_c(p->filename));
- }
- static char*
- skipwhite(char *p)
- {
- while(isspace(*p))
- p++;
- return p;
- }
- static char*
- skiptosemi(char *p)
- {
- while(*p && *p != ';')
- p++;
- while(*p == ';' || isspace(*p))
- p++;
- return p;
- }
- /*
- * parse a possibly "'d string from a header. A
- * ';' terminates the string.
- */
- static char*
- getstring(char *p, String *s, int dolower)
- {
- s = s_reset(s);
- p = skipwhite(p);
- if(*p == '"'){
- p++;
- for(;*p && *p != '"'; p++)
- if(dolower)
- s_putc(s, tolower(*p));
- else
- s_putc(s, *p);
- if(*p == '"')
- p++;
- s_terminate(s);
- return p;
- }
- for(; *p && !isspace(*p) && *p != ';'; p++)
- if(dolower)
- s_putc(s, tolower(*p));
- else
- s_putc(s, *p);
- s_terminate(s);
- return p;
- }
- static void
- init_hdefs(void)
- {
- Hdef *hd;
- static int already;
- if(already)
- return;
- already = 1;
- for(hd = hdefs; hd->type != nil; hd++)
- hd->len = strlen(hd->type);
- }
- /*
- * create a new boundary
- */
- static String*
- mkboundary(void)
- {
- char buf[32];
- int i;
- static int already;
- if(already == 0){
- srand((time(0)<<16)|getpid());
- already = 1;
- }
- strcpy(buf, "upas-");
- for(i = 5; i < sizeof(buf)-1; i++)
- buf[i] = 'a' + nrand(26);
- buf[i] = 0;
- return s_copy(buf);
- }
- /*
- * skip blank lines till header
- */
- static void
- passnotheader(void)
- {
- char *cp;
- int i, n;
- while((cp = Brdline(&in, '\n')) != nil){
- n = Blinelen(&in);
- for(i = 0; i < n-1; i++)
- if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){
- Bseek(&in, -n, 1);
- return;
- }
- Bwrite(&out, cp, n);
- }
- }
- /*
- * pass unix header lines
- */
- static void
- passunixheader(void)
- {
- char *p;
- int n;
- while((p = Brdline(&in, '\n')) != nil){
- n = Blinelen(&in);
- if(strncmp(p, "From ", 5) != 0){
- Bseek(&in, -n, 1);
- break;
- }
- Bwrite(&out, p, n);
- }
- }
- /*
- * Read mime types
- */
- static void
- readmtypes(void)
- {
- Biobuf *b;
- char *p;
- char *f[6];
- Mtype *m;
- Mtype **l;
- b = Bopen("/sys/lib/mimetype", OREAD);
- if(b == nil)
- return;
- l = &mtypes;
- while((p = Brdline(b, '\n')) != nil){
- if(*p == '#')
- continue;
- p[Blinelen(b)-1] = 0;
- if(tokenize(p, f, nelem(f)) < 5)
- continue;
- m = mallocz(sizeof *m, 1);
- if(m == nil)
- goto err;
- m->ext = strdup(f[0]);
- if(m->ext == 0)
- goto err;
- m->gtype = strdup(f[1]);
- if(m->gtype == 0)
- goto err;
- m->stype = strdup(f[2]);
- if(m->stype == 0)
- goto err;
- m->class = *f[4];
- *l = m;
- l = &(m->next);
- }
- Bterm(b);
- return;
- err:
- if(m == nil)
- return;
- free(m->ext);
- free(m->gtype);
- free(m->stype);
- free(m);
- Bterm(b);
- }
- /*
- * if the class is 'm' or 'y', accept it
- * if the class is 'p' check a previous extension
- * otherwise, filename is bad
- */
- static int
- badfile(char *name)
- {
- char *p;
- Mtype *m;
- int rv;
- p = strrchr(name, '.');
- if(p == nil)
- return 0;
- for(m = mtypes; m != nil; m = m->next)
- if(cistrcmp(p, m->ext) == 0){
- switch(m->class){
- case 'm':
- case 'y':
- return 0;
- case 'p':
- *p = 0;
- rv = badfile(name);
- *p = '.';
- return rv;
- case 'r':
- return 2;
- }
- }
- return 1;
- }
- /*
- * if the class is 'm' or 'y' or 'p', accept it
- * otherwise, filename is bad
- */
- static int
- badtype(char *type)
- {
- Mtype *m;
- char *s, *fix;
- int rv = 1;
- fix = s = strchr(type, '/');
- if(s != nil)
- *s++ = 0;
- else
- s = "-";
- for(m = mtypes; m != nil; m = m->next){
- if(cistrcmp(type, m->gtype) != 0)
- continue;
- if(cistrcmp(s, m->stype) != 0)
- continue;
- switch(m->class){
- case 'y':
- case 'p':
- case 'm':
- rv = 0;
- break;
- }
- break;
- }
- if(fix != nil)
- *fix = '/';
- return rv;
- }
- /* rfc2047 non-ascii */
- typedef struct Charset Charset;
- struct Charset {
- char *name;
- int len;
- int convert;
- } charsets[] =
- {
- { "us-ascii", 8, 1, },
- { "utf-8", 5, 0, },
- { "iso-8859-1", 10, 1, },
- };
- /*
- * convert to UTF if need be
- */
- static String*
- tokenconvert(String *t)
- {
- String *s;
- char decoded[1024];
- char utfbuf[2*1024];
- int i, len;
- char *e;
- char *token;
- token = s_to_c(t);
- len = s_len(t);
- if(token[0] != '=' || token[1] != '?' ||
- token[len-2] != '?' || token[len-1] != '=')
- goto err;
- e = token+len-2;
- token += 2;
- /* bail if we don't understand the character set */
- for(i = 0; i < nelem(charsets); i++)
- if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0)
- if(token[charsets[i].len] == '?'){
- token += charsets[i].len + 1;
- break;
- }
- if(i >= nelem(charsets))
- goto err;
- /* bail if it doesn't fit */
- if(strlen(token) > sizeof(decoded)-1)
- goto err;
- /* bail if we don't understand the encoding */
- if(cistrncmp(token, "b?", 2) == 0){
- token += 2;
- len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
- decoded[len] = 0;
- } else if(cistrncmp(token, "q?", 2) == 0){
- token += 2;
- len = decquoted(decoded, token, e);
- if(len > 0 && decoded[len-1] == '\n')
- len--;
- decoded[len] = 0;
- } else
- goto err;
- s = nil;
- switch(charsets[i].convert){
- case 0:
- s = s_copy(decoded);
- break;
- case 1:
- s = s_new();
- latin1toutf(utfbuf, decoded, decoded+len);
- s_append(s, utfbuf);
- break;
- }
- return s;
- err:
- return s_clone(t);
- }
- /*
- * decode quoted
- */
- enum
- {
- Self= 1,
- Hex= 2,
- };
- uchar tableqp[256];
- static void
- initquoted(void)
- {
- int c;
- memset(tableqp, 0, 256);
- for(c = ' '; c <= '<'; c++)
- tableqp[c] = Self;
- for(c = '>'; c <= '~'; c++)
- tableqp[c] = Self;
- tableqp['\t'] = Self;
- tableqp['='] = Hex;
- }
- static int
- hex2int(int x)
- {
- if(x >= '0' && x <= '9')
- return x - '0';
- if(x >= 'A' && x <= 'F')
- return (x - 'A') + 10;
- if(x >= 'a' && x <= 'f')
- return (x - 'a') + 10;
- return 0;
- }
- static char*
- decquotedline(char *out, char *in, char *e)
- {
- int c, soft;
- /* dump trailing white space */
- while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
- e--;
- /* trailing '=' means no newline */
- if(*e == '='){
- soft = 1;
- e--;
- } else
- soft = 0;
- while(in <= e){
- c = (*in++) & 0xff;
- switch(tableqp[c]){
- case Self:
- *out++ = c;
- break;
- case Hex:
- c = hex2int(*in++)<<4;
- c |= hex2int(*in++);
- *out++ = c;
- break;
- }
- }
- if(!soft)
- *out++ = '\n';
- *out = 0;
- return out;
- }
- static int
- decquoted(char *out, char *in, char *e)
- {
- char *p, *nl;
- if(tableqp[' '] == 0)
- initquoted();
- p = out;
- while((nl = strchr(in, '\n')) != nil && nl < e){
- p = decquotedline(p, in, nl);
- in = nl + 1;
- }
- if(in < e)
- p = decquotedline(p, in, e-1);
- /* make sure we end with a new line */
- if(*(p-1) != '\n'){
- *p++ = '\n';
- *p = 0;
- }
- return p - out;
- }
- /* translate latin1 directly since it fits neatly in utf */
- static int
- latin1toutf(char *out, char *in, char *e)
- {
- Rune r;
- char *p;
- p = out;
- for(; in < e; in++){
- r = (*in) & 0xff;
- p += runetochar(p, &r);
- }
- *p = 0;
- return p - out;
- }
|