vf.c 20 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117
  1. /*
  2. * this is a filter that changes mime types and names of
  3. * suspect executable attachments.
  4. */
  5. #include "common.h"
  6. #include <ctype.h>
  7. Biobuf in;
  8. Biobuf out;
  9. typedef struct Mtype Mtype;
  10. typedef struct Hdef Hdef;
  11. typedef struct Hline Hline;
  12. typedef struct Part Part;
  13. static int badfile(char *name);
  14. static int badtype(char *type);
  15. static void ctype(Part*, Hdef*, char*);
  16. static void cencoding(Part*, Hdef*, char*);
  17. static void cdisposition(Part*, Hdef*, char*);
  18. static int decquoted(char *out, char *in, char *e);
  19. static char* getstring(char *p, String *s, int dolower);
  20. static void init_hdefs(void);
  21. static int isattribute(char **pp, char *attr);
  22. static int latin1toutf(char *out, char *in, char *e);
  23. static String* mkboundary(void);
  24. static Part* part(Part *pp);
  25. static Part* passbody(Part *p, int dobound);
  26. static void passnotheader(void);
  27. static void passunixheader(void);
  28. static Part* problemchild(Part *p);
  29. static void readheader(Part *p);
  30. static Hline* readhl(void);
  31. static void readmtypes(void);
  32. static int save(Part *p, char *file);
  33. static void setfilename(Part *p, char *name);
  34. static char* skiptosemi(char *p);
  35. static char* skipwhite(char *p);
  36. static String* tokenconvert(String *t);
  37. static void writeheader(Part *p, int);
  38. enum
  39. {
  40. // encodings
  41. Enone= 0,
  42. Ebase64,
  43. Equoted,
  44. // disposition possibilities
  45. Dnone= 0,
  46. Dinline,
  47. Dfile,
  48. Dignore,
  49. PAD64= '=',
  50. };
  51. /*
  52. * a message part; either the whole message or a subpart
  53. */
  54. struct Part
  55. {
  56. Part *pp; /* parent part */
  57. Hline *hl; /* linked list of header lines */
  58. int disposition;
  59. int encoding;
  60. int badfile;
  61. int badtype;
  62. String *boundary; /* boundary for multiparts */
  63. int blen;
  64. String *charset; /* character set */
  65. String *type; /* content type */
  66. String *filename; /* file name */
  67. Biobuf *tmpbuf; /* diversion input buffer */
  68. };
  69. /*
  70. * a (multi)line header
  71. */
  72. struct Hline
  73. {
  74. Hline *next;
  75. String *s;
  76. };
  77. /*
  78. * header definitions for parsing
  79. */
  80. struct Hdef
  81. {
  82. char *type;
  83. void (*f)(Part*, Hdef*, char*);
  84. int len;
  85. };
  86. Hdef hdefs[] =
  87. {
  88. { "content-type:", ctype, },
  89. { "content-transfer-encoding:", cencoding, },
  90. { "content-disposition:", cdisposition, },
  91. { 0, },
  92. };
  93. /*
  94. * acceptable content types and their extensions
  95. */
  96. struct Mtype {
  97. Mtype *next;
  98. char *ext; /* extension */
  99. char *gtype; /* generic content type */
  100. char *stype; /* specific content type */
  101. char class;
  102. };
  103. Mtype *mtypes;
  104. int justreject;
  105. char *savefile;
  106. void
  107. usage(void)
  108. {
  109. fprint(2, "usage: upas/vf [-r] [-s savefile]\n");
  110. exits("usage");
  111. }
  112. void
  113. main(int argc, char **argv)
  114. {
  115. ARGBEGIN{
  116. case 'r':
  117. justreject = 1;
  118. break;
  119. case 's':
  120. savefile = EARGF(usage());
  121. break;
  122. default:
  123. usage();
  124. }ARGEND
  125. if(argc)
  126. usage();
  127. Binit(&in, 0, OREAD);
  128. Binit(&out, 1, OWRITE);
  129. init_hdefs();
  130. readmtypes();
  131. /* pass through our standard 'From ' line */
  132. passunixheader();
  133. /* parse with the top level part */
  134. part(nil);
  135. exits(0);
  136. }
  137. void
  138. refuse(void)
  139. {
  140. postnote(PNGROUP, getpid(), "mail refused: we don't accept executable attachments");
  141. exits("mail refused: we don't accept executable attachments");
  142. }
  143. /*
  144. * parse a part; returns the ancestor whose boundary terminated
  145. * this part or nil on EOF.
  146. */
  147. static Part*
  148. part(Part *pp)
  149. {
  150. Part *p, *np;
  151. p = mallocz(sizeof *p, 1);
  152. p->pp = pp;
  153. readheader(p);
  154. if(p->boundary != nil){
  155. /* the format of a multipart part is always:
  156. * header
  157. * null or ignored body
  158. * boundary
  159. * header
  160. * body
  161. * boundary
  162. * ...
  163. */
  164. writeheader(p, 1);
  165. np = passbody(p, 1);
  166. if(np != p)
  167. return np;
  168. for(;;){
  169. np = part(p);
  170. if(np != p)
  171. return np;
  172. }
  173. } else {
  174. /* no boundary */
  175. /* may still be multipart if this is a forwarded message */
  176. if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){
  177. /* the format of forwarded message is:
  178. * header
  179. * header
  180. * body
  181. */
  182. writeheader(p, 1);
  183. passnotheader();
  184. return part(p);
  185. } else {
  186. /*
  187. * This is the meat. This may be an executable.
  188. * if so, wrap it and change its type
  189. */
  190. if(p->badtype || p->badfile){
  191. if(p->badfile == 2){
  192. if(savefile != nil)
  193. save(p, savefile);
  194. syslog(0, "vf", "vf rejected %s %s", p->type?s_to_c(p->type):"?",
  195. p->filename?s_to_c(p->filename):"?");
  196. fprint(2, "The mail contained an executable attachment.\n");
  197. fprint(2, "We refuse all mail containing such.\n");
  198. refuse();
  199. }
  200. np = problemchild(p);
  201. if(np != p)
  202. return np;
  203. /* if problemchild returns p, it turns out p is okay: fall thru */
  204. }
  205. writeheader(p, 1);
  206. return passbody(p, 1);
  207. }
  208. }
  209. }
  210. /*
  211. * read and parse a complete header
  212. */
  213. static void
  214. readheader(Part *p)
  215. {
  216. Hline *hl, **l;
  217. Hdef *hd;
  218. l = &p->hl;
  219. for(;;){
  220. hl = readhl();
  221. if(hl == nil)
  222. break;
  223. *l = hl;
  224. l = &hl->next;
  225. for(hd = hdefs; hd->type != nil; hd++){
  226. if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){
  227. (*hd->f)(p, hd, s_to_c(hl->s));
  228. break;
  229. }
  230. }
  231. }
  232. }
  233. /*
  234. * read a possibly multiline header line
  235. */
  236. static Hline*
  237. readhl(void)
  238. {
  239. Hline *hl;
  240. String *s;
  241. char *p;
  242. int n;
  243. p = Brdline(&in, '\n');
  244. if(p == nil)
  245. return nil;
  246. n = Blinelen(&in);
  247. if(memchr(p, ':', n) == nil){
  248. Bseek(&in, -n, 1);
  249. return nil;
  250. }
  251. s = s_nappend(s_new(), p, n);
  252. for(;;){
  253. p = Brdline(&in, '\n');
  254. if(p == nil)
  255. break;
  256. n = Blinelen(&in);
  257. if(*p != ' ' && *p != '\t'){
  258. Bseek(&in, -n, 1);
  259. break;
  260. }
  261. s = s_nappend(s, p, n);
  262. }
  263. hl = malloc(sizeof *hl);
  264. hl->s = s;
  265. hl->next = nil;
  266. return hl;
  267. }
  268. /*
  269. * write out a complete header
  270. */
  271. static void
  272. writeheader(Part *p, int xfree)
  273. {
  274. Hline *hl, *next;
  275. for(hl = p->hl; hl != nil; hl = next){
  276. Bprint(&out, "%s", s_to_c(hl->s));
  277. if(xfree)
  278. s_free(hl->s);
  279. next = hl->next;
  280. if(xfree)
  281. free(hl);
  282. }
  283. if(xfree)
  284. p->hl = nil;
  285. }
  286. /*
  287. * pass a body through. return if we hit one of our ancestors'
  288. * boundaries or EOF. if we hit a boundary, return a pointer to
  289. * that ancestor. if we hit EOF, return nil.
  290. */
  291. static Part*
  292. passbody(Part *p, int dobound)
  293. {
  294. Part *pp;
  295. Biobuf *b;
  296. char *cp;
  297. for(;;){
  298. if(p->tmpbuf){
  299. b = p->tmpbuf;
  300. cp = Brdline(b, '\n');
  301. if(cp == nil){
  302. Bterm(b);
  303. p->tmpbuf = nil;
  304. goto Stdin;
  305. }
  306. }else{
  307. Stdin:
  308. b = &in;
  309. cp = Brdline(b, '\n');
  310. }
  311. if(cp == nil)
  312. return nil;
  313. for(pp = p; pp != nil; pp = pp->pp)
  314. if(pp->boundary != nil
  315. && strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){
  316. if(dobound)
  317. Bwrite(&out, cp, Blinelen(b));
  318. else
  319. Bseek(b, -Blinelen(b), 1);
  320. return pp;
  321. }
  322. Bwrite(&out, cp, Blinelen(b));
  323. }
  324. return nil;
  325. }
  326. /*
  327. * save the message somewhere
  328. */
  329. static vlong bodyoff; /* clumsy hack */
  330. static int
  331. save(Part *p, char *file)
  332. {
  333. int fd;
  334. char *cp;
  335. Bterm(&out);
  336. memset(&out, 0, sizeof(out));
  337. fd = open(file, OWRITE);
  338. if(fd < 0)
  339. return -1;
  340. seek(fd, 0, 2);
  341. Binit(&out, fd, OWRITE);
  342. cp = ctime(time(0));
  343. cp[28] = 0;
  344. Bprint(&out, "From virusfilter %s\n", cp);
  345. writeheader(p, 0);
  346. bodyoff = Boffset(&out);
  347. passbody(p, 1);
  348. Bprint(&out, "\n");
  349. Bterm(&out);
  350. close(fd);
  351. memset(&out, 0, sizeof out);
  352. Binit(&out, 1, OWRITE);
  353. return 0;
  354. }
  355. /*
  356. * write to a file but save the fd for passbody.
  357. */
  358. static char*
  359. savetmp(Part *p)
  360. {
  361. char buf[40], *name;
  362. int fd;
  363. strcpy(buf, "/tmp/vf.XXXXXXXXXXX");
  364. name = mktemp(buf);
  365. if((fd = create(name, OWRITE|OEXCL, 0666)) < 0){
  366. fprint(2, "error creating temporary file: %r\n");
  367. refuse();
  368. }
  369. close(fd);
  370. if(save(p, name) < 0){
  371. fprint(2, "error saving temporary file: %r\n");
  372. refuse();
  373. }
  374. if(p->tmpbuf){
  375. fprint(2, "error in savetmp: already have tmp file!\n");
  376. refuse();
  377. }
  378. p->tmpbuf = Bopen(name, OREAD|ORCLOSE);
  379. if(p->tmpbuf == nil){
  380. fprint(2, "error reading tempoary file: %r\n");
  381. refuse();
  382. }
  383. Bseek(p->tmpbuf, bodyoff, 0);
  384. return strdup(name);
  385. }
  386. /*
  387. * Run the external checker to do content-based checks.
  388. */
  389. static int
  390. runchecker(Part *p)
  391. {
  392. int pid;
  393. char *name;
  394. Waitmsg *w;
  395. if(access("/mail/lib/validateattachment", AEXEC) < 0)
  396. return 0;
  397. name = savetmp(p);
  398. fprint(2, "run checker %s\n", name);
  399. switch(pid = fork()){
  400. case -1:
  401. sysfatal("fork: %r");
  402. case 0:
  403. dup(2, 1);
  404. execl("/mail/lib/validateattachment", "validateattachment", name, nil);
  405. _exits("exec failed");
  406. }
  407. /*
  408. * Okay to return on error - will let mail through but wrapped.
  409. */
  410. w = wait();
  411. if(w == nil){
  412. syslog(0, "mail", "vf wait failed: %r");
  413. return 0;
  414. }
  415. if(w->pid != pid){
  416. syslog(0, "mail", "vf wrong pid %d != %d", w->pid, pid);
  417. return 0;
  418. }
  419. if(p->filename)
  420. name = s_to_c(p->filename);
  421. if(strstr(w->msg, "discard")){
  422. syslog(0, "mail", "vf validateattachment rejected %s", name);
  423. refuse();
  424. }
  425. if(strstr(w->msg, "accept")){
  426. syslog(0, "mail", "vf validateattachment accepted %s", name);
  427. return 1;
  428. }
  429. free(w);
  430. return 0;
  431. }
  432. /*
  433. * emit a multipart Part that explains the problem
  434. */
  435. static Part*
  436. problemchild(Part *p)
  437. {
  438. Part *np;
  439. Hline *hl;
  440. String *boundary;
  441. char *cp;
  442. /*
  443. * We don't know whether the attachment is okay.
  444. * If there's an external checker, let it have a crack at it.
  445. */
  446. if(runchecker(p) > 0)
  447. return p;
  448. if(justreject)
  449. return p;
  450. fprint(2, "x\n");
  451. syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?",
  452. p->filename?s_to_c(p->filename):"?");
  453. fprint(2, "x\n");
  454. boundary = mkboundary();
  455. fprint(2, "x\n");
  456. /* print out non-mime headers */
  457. for(hl = p->hl; hl != nil; hl = hl->next)
  458. if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0)
  459. Bprint(&out, "%s", s_to_c(hl->s));
  460. fprint(2, "x\n");
  461. /* add in our own multipart headers and message */
  462. Bprint(&out, "Content-Type: multipart/mixed;\n");
  463. Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary));
  464. Bprint(&out, "Content-Disposition: inline\n");
  465. Bprint(&out, "\n");
  466. Bprint(&out, "This is a multi-part message in MIME format.\n");
  467. Bprint(&out, "--%s\n", s_to_c(boundary));
  468. Bprint(&out, "Content-Disposition: inline\n");
  469. Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n");
  470. Bprint(&out, "Content-Transfer-Encoding: 7bit\n");
  471. Bprint(&out, "\n");
  472. Bprint(&out, "from postmaster@%s:\n", sysname());
  473. Bprint(&out, "The following attachment had content that we can't\n");
  474. Bprint(&out, "prove to be harmless. To avoid possible automatic\n");
  475. Bprint(&out, "execution, we changed the content headers.\n");
  476. Bprint(&out, "The original header was:\n\n");
  477. /* print out original header lines */
  478. for(hl = p->hl; hl != nil; hl = hl->next)
  479. if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0)
  480. Bprint(&out, "\t%s", s_to_c(hl->s));
  481. Bprint(&out, "--%s\n", s_to_c(boundary));
  482. /* change file name */
  483. if(p->filename)
  484. s_append(p->filename, ".suspect");
  485. else
  486. p->filename = s_copy("file.suspect");
  487. /* print out new header */
  488. Bprint(&out, "Content-Type: application/octet-stream\n");
  489. Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename));
  490. switch(p->encoding){
  491. case Enone:
  492. break;
  493. case Ebase64:
  494. Bprint(&out, "Content-Transfer-Encoding: base64\n");
  495. break;
  496. case Equoted:
  497. Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n");
  498. break;
  499. }
  500. fprint(2, "z\n");
  501. /* pass the body */
  502. np = passbody(p, 0);
  503. fprint(2, "w\n");
  504. /* add the new boundary and the original terminator */
  505. Bprint(&out, "--%s--\n", s_to_c(boundary));
  506. if(np && np->boundary){
  507. cp = Brdline(&in, '\n');
  508. Bwrite(&out, cp, Blinelen(&in));
  509. }
  510. fprint(2, "a %p\n", np);
  511. return np;
  512. }
  513. static int
  514. isattribute(char **pp, char *attr)
  515. {
  516. char *p;
  517. int n;
  518. n = strlen(attr);
  519. p = *pp;
  520. if(cistrncmp(p, attr, n) != 0)
  521. return 0;
  522. p += n;
  523. while(*p == ' ')
  524. p++;
  525. if(*p++ != '=')
  526. return 0;
  527. while(*p == ' ')
  528. p++;
  529. *pp = p;
  530. return 1;
  531. }
  532. /*
  533. * parse content type header
  534. */
  535. static void
  536. ctype(Part *p, Hdef *h, char *cp)
  537. {
  538. String *s;
  539. cp += h->len;
  540. cp = skipwhite(cp);
  541. p->type = s_new();
  542. cp = getstring(cp, p->type, 1);
  543. if(badtype(s_to_c(p->type)))
  544. p->badtype = 1;
  545. while(*cp){
  546. if(isattribute(&cp, "boundary")){
  547. s = s_new();
  548. cp = getstring(cp, s, 0);
  549. p->boundary = s_reset(p->boundary);
  550. s_append(p->boundary, "--");
  551. s_append(p->boundary, s_to_c(s));
  552. p->blen = s_len(p->boundary);
  553. s_free(s);
  554. } else if(cistrncmp(cp, "multipart", 9) == 0){
  555. /*
  556. * the first unbounded part of a multipart message,
  557. * the preamble, is not displayed or saved
  558. */
  559. } else if(isattribute(&cp, "name")){
  560. setfilename(p, cp);
  561. } else if(isattribute(&cp, "charset")){
  562. if(p->charset == nil)
  563. p->charset = s_new();
  564. cp = getstring(cp, s_reset(p->charset), 0);
  565. }
  566. cp = skiptosemi(cp);
  567. }
  568. }
  569. /*
  570. * parse content encoding header
  571. */
  572. static void
  573. cencoding(Part *m, Hdef *h, char *p)
  574. {
  575. p += h->len;
  576. p = skipwhite(p);
  577. if(cistrncmp(p, "base64", 6) == 0)
  578. m->encoding = Ebase64;
  579. else if(cistrncmp(p, "quoted-printable", 16) == 0)
  580. m->encoding = Equoted;
  581. }
  582. /*
  583. * parse content disposition header
  584. */
  585. static void
  586. cdisposition(Part *p, Hdef *h, char *cp)
  587. {
  588. cp += h->len;
  589. cp = skipwhite(cp);
  590. while(*cp){
  591. if(cistrncmp(cp, "inline", 6) == 0){
  592. p->disposition = Dinline;
  593. } else if(cistrncmp(cp, "attachment", 10) == 0){
  594. p->disposition = Dfile;
  595. } else if(cistrncmp(cp, "filename=", 9) == 0){
  596. cp += 9;
  597. setfilename(p, cp);
  598. }
  599. cp = skiptosemi(cp);
  600. }
  601. }
  602. static void
  603. setfilename(Part *p, char *name)
  604. {
  605. if(p->filename == nil)
  606. p->filename = s_new();
  607. getstring(name, s_reset(p->filename), 0);
  608. p->filename = tokenconvert(p->filename);
  609. p->badfile = badfile(s_to_c(p->filename));
  610. }
  611. static char*
  612. skipwhite(char *p)
  613. {
  614. while(isspace(*p))
  615. p++;
  616. return p;
  617. }
  618. static char*
  619. skiptosemi(char *p)
  620. {
  621. while(*p && *p != ';')
  622. p++;
  623. while(*p == ';' || isspace(*p))
  624. p++;
  625. return p;
  626. }
  627. /*
  628. * parse a possibly "'d string from a header. A
  629. * ';' terminates the string.
  630. */
  631. static char*
  632. getstring(char *p, String *s, int dolower)
  633. {
  634. s = s_reset(s);
  635. p = skipwhite(p);
  636. if(*p == '"'){
  637. p++;
  638. for(;*p && *p != '"'; p++)
  639. if(dolower)
  640. s_putc(s, tolower(*p));
  641. else
  642. s_putc(s, *p);
  643. if(*p == '"')
  644. p++;
  645. s_terminate(s);
  646. return p;
  647. }
  648. for(; *p && !isspace(*p) && *p != ';'; p++)
  649. if(dolower)
  650. s_putc(s, tolower(*p));
  651. else
  652. s_putc(s, *p);
  653. s_terminate(s);
  654. return p;
  655. }
  656. static void
  657. init_hdefs(void)
  658. {
  659. Hdef *hd;
  660. static int already;
  661. if(already)
  662. return;
  663. already = 1;
  664. for(hd = hdefs; hd->type != nil; hd++)
  665. hd->len = strlen(hd->type);
  666. }
  667. /*
  668. * create a new boundary
  669. */
  670. static String*
  671. mkboundary(void)
  672. {
  673. char buf[32];
  674. int i;
  675. static int already;
  676. if(already == 0){
  677. srand((time(0)<<16)|getpid());
  678. already = 1;
  679. }
  680. strcpy(buf, "upas-");
  681. for(i = 5; i < sizeof(buf)-1; i++)
  682. buf[i] = 'a' + nrand(26);
  683. buf[i] = 0;
  684. return s_copy(buf);
  685. }
  686. /*
  687. * skip blank lines till header
  688. */
  689. static void
  690. passnotheader(void)
  691. {
  692. char *cp;
  693. int i, n;
  694. while((cp = Brdline(&in, '\n')) != nil){
  695. n = Blinelen(&in);
  696. for(i = 0; i < n-1; i++)
  697. if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){
  698. Bseek(&in, -n, 1);
  699. return;
  700. }
  701. Bwrite(&out, cp, n);
  702. }
  703. }
  704. /*
  705. * pass unix header lines
  706. */
  707. static void
  708. passunixheader(void)
  709. {
  710. char *p;
  711. int n;
  712. while((p = Brdline(&in, '\n')) != nil){
  713. n = Blinelen(&in);
  714. if(strncmp(p, "From ", 5) != 0){
  715. Bseek(&in, -n, 1);
  716. break;
  717. }
  718. Bwrite(&out, p, n);
  719. }
  720. }
  721. /*
  722. * Read mime types
  723. */
  724. static void
  725. readmtypes(void)
  726. {
  727. Biobuf *b;
  728. char *p;
  729. char *f[6];
  730. Mtype *m;
  731. Mtype **l;
  732. b = Bopen("/sys/lib/mimetype", OREAD);
  733. if(b == nil)
  734. return;
  735. l = &mtypes;
  736. while((p = Brdline(b, '\n')) != nil){
  737. if(*p == '#')
  738. continue;
  739. p[Blinelen(b)-1] = 0;
  740. if(tokenize(p, f, nelem(f)) < 5)
  741. continue;
  742. m = mallocz(sizeof *m, 1);
  743. if(m == nil)
  744. goto err;
  745. m->ext = strdup(f[0]);
  746. if(m->ext == 0)
  747. goto err;
  748. m->gtype = strdup(f[1]);
  749. if(m->gtype == 0)
  750. goto err;
  751. m->stype = strdup(f[2]);
  752. if(m->stype == 0)
  753. goto err;
  754. m->class = *f[4];
  755. *l = m;
  756. l = &(m->next);
  757. }
  758. Bterm(b);
  759. return;
  760. err:
  761. if(m == nil)
  762. return;
  763. free(m->ext);
  764. free(m->gtype);
  765. free(m->stype);
  766. free(m);
  767. Bterm(b);
  768. }
  769. /*
  770. * if the class is 'm' or 'y', accept it
  771. * if the class is 'p' check a previous extension
  772. * otherwise, filename is bad
  773. */
  774. static int
  775. badfile(char *name)
  776. {
  777. char *p;
  778. Mtype *m;
  779. int rv;
  780. p = strrchr(name, '.');
  781. if(p == nil)
  782. return 0;
  783. for(m = mtypes; m != nil; m = m->next)
  784. if(cistrcmp(p, m->ext) == 0){
  785. switch(m->class){
  786. case 'm':
  787. case 'y':
  788. return 0;
  789. case 'p':
  790. *p = 0;
  791. rv = badfile(name);
  792. *p = '.';
  793. return rv;
  794. case 'r':
  795. return 2;
  796. }
  797. }
  798. return 1;
  799. }
  800. /*
  801. * if the class is 'm' or 'y' or 'p', accept it
  802. * otherwise, filename is bad
  803. */
  804. static int
  805. badtype(char *type)
  806. {
  807. Mtype *m;
  808. char *s, *fix;
  809. int rv = 1;
  810. fix = s = strchr(type, '/');
  811. if(s != nil)
  812. *s++ = 0;
  813. else
  814. s = "-";
  815. for(m = mtypes; m != nil; m = m->next){
  816. if(cistrcmp(type, m->gtype) != 0)
  817. continue;
  818. if(cistrcmp(s, m->stype) != 0)
  819. continue;
  820. switch(m->class){
  821. case 'y':
  822. case 'p':
  823. case 'm':
  824. rv = 0;
  825. break;
  826. }
  827. break;
  828. }
  829. if(fix != nil)
  830. *fix = '/';
  831. return rv;
  832. }
  833. /* rfc2047 non-ascii */
  834. typedef struct Charset Charset;
  835. struct Charset {
  836. char *name;
  837. int len;
  838. int convert;
  839. } charsets[] =
  840. {
  841. { "us-ascii", 8, 1, },
  842. { "utf-8", 5, 0, },
  843. { "iso-8859-1", 10, 1, },
  844. };
  845. /*
  846. * convert to UTF if need be
  847. */
  848. static String*
  849. tokenconvert(String *t)
  850. {
  851. String *s;
  852. char decoded[1024];
  853. char utfbuf[2*1024];
  854. int i, len;
  855. char *e;
  856. char *token;
  857. token = s_to_c(t);
  858. len = s_len(t);
  859. if(token[0] != '=' || token[1] != '?' ||
  860. token[len-2] != '?' || token[len-1] != '=')
  861. goto err;
  862. e = token+len-2;
  863. token += 2;
  864. // bail if we don't understand the character set
  865. for(i = 0; i < nelem(charsets); i++)
  866. if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0)
  867. if(token[charsets[i].len] == '?'){
  868. token += charsets[i].len + 1;
  869. break;
  870. }
  871. if(i >= nelem(charsets))
  872. goto err;
  873. // bail if it doesn't fit
  874. if(strlen(token) > sizeof(decoded)-1)
  875. goto err;
  876. // bail if we don't understand the encoding
  877. if(cistrncmp(token, "b?", 2) == 0){
  878. token += 2;
  879. len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
  880. decoded[len] = 0;
  881. } else if(cistrncmp(token, "q?", 2) == 0){
  882. token += 2;
  883. len = decquoted(decoded, token, e);
  884. if(len > 0 && decoded[len-1] == '\n')
  885. len--;
  886. decoded[len] = 0;
  887. } else
  888. goto err;
  889. s = nil;
  890. switch(charsets[i].convert){
  891. case 0:
  892. s = s_copy(decoded);
  893. break;
  894. case 1:
  895. s = s_new();
  896. latin1toutf(utfbuf, decoded, decoded+len);
  897. s_append(s, utfbuf);
  898. break;
  899. }
  900. return s;
  901. err:
  902. return s_clone(t);
  903. }
  904. /*
  905. * decode quoted
  906. */
  907. enum
  908. {
  909. Self= 1,
  910. Hex= 2,
  911. };
  912. uchar tableqp[256];
  913. static void
  914. initquoted(void)
  915. {
  916. int c;
  917. memset(tableqp, 0, 256);
  918. for(c = ' '; c <= '<'; c++)
  919. tableqp[c] = Self;
  920. for(c = '>'; c <= '~'; c++)
  921. tableqp[c] = Self;
  922. tableqp['\t'] = Self;
  923. tableqp['='] = Hex;
  924. }
  925. static int
  926. hex2int(int x)
  927. {
  928. if(x >= '0' && x <= '9')
  929. return x - '0';
  930. if(x >= 'A' && x <= 'F')
  931. return (x - 'A') + 10;
  932. if(x >= 'a' && x <= 'f')
  933. return (x - 'a') + 10;
  934. return 0;
  935. }
  936. static char*
  937. decquotedline(char *out, char *in, char *e)
  938. {
  939. int c, soft;
  940. /* dump trailing white space */
  941. while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
  942. e--;
  943. /* trailing '=' means no newline */
  944. if(*e == '='){
  945. soft = 1;
  946. e--;
  947. } else
  948. soft = 0;
  949. while(in <= e){
  950. c = (*in++) & 0xff;
  951. switch(tableqp[c]){
  952. case Self:
  953. *out++ = c;
  954. break;
  955. case Hex:
  956. c = hex2int(*in++)<<4;
  957. c |= hex2int(*in++);
  958. *out++ = c;
  959. break;
  960. }
  961. }
  962. if(!soft)
  963. *out++ = '\n';
  964. *out = 0;
  965. return out;
  966. }
  967. static int
  968. decquoted(char *out, char *in, char *e)
  969. {
  970. char *p, *nl;
  971. if(tableqp[' '] == 0)
  972. initquoted();
  973. p = out;
  974. while((nl = strchr(in, '\n')) != nil && nl < e){
  975. p = decquotedline(p, in, nl);
  976. in = nl + 1;
  977. }
  978. if(in < e)
  979. p = decquotedline(p, in, e-1);
  980. // make sure we end with a new line
  981. if(*(p-1) != '\n'){
  982. *p++ = '\n';
  983. *p = 0;
  984. }
  985. return p - out;
  986. }
  987. /* translate latin1 directly since it fits neatly in utf */
  988. static int
  989. latin1toutf(char *out, char *in, char *e)
  990. {
  991. Rune r;
  992. char *p;
  993. p = out;
  994. for(; in < e; in++){
  995. r = (*in) & 0xff;
  996. p += runetochar(p, &r);
  997. }
  998. *p = 0;
  999. return p - out;
  1000. }