vf.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122
  1. /*
  2. * this is a filter that changes mime types and names of
  3. * suspect executable attachments.
  4. */
  5. #include "common.h"
  6. #include <ctype.h>
  7. Biobuf in;
  8. Biobuf out;
  9. typedef struct Mtype Mtype;
  10. typedef struct Hdef Hdef;
  11. typedef struct Hline Hline;
  12. typedef struct Part Part;
  13. static int badfile(char *name);
  14. static int badtype(char *type);
  15. static void ctype(Part*, Hdef*, char*);
  16. static void cencoding(Part*, Hdef*, char*);
  17. static void cdisposition(Part*, Hdef*, char*);
  18. static int decquoted(char *out, char *in, char *e);
  19. static char* getstring(char *p, String *s, int dolower);
  20. static void init_hdefs(void);
  21. static int isattribute(char **pp, char *attr);
  22. static int latin1toutf(char *out, char *in, char *e);
  23. static String* mkboundary(void);
  24. static Part* part(Part *pp);
  25. static Part* passbody(Part *p, int dobound);
  26. static void passnotheader(void);
  27. static void passunixheader(void);
  28. static Part* problemchild(Part *p);
  29. static void readheader(Part *p);
  30. static Hline* readhl(void);
  31. static void readmtypes(void);
  32. static int save(Part *p, char *file);
  33. static void setfilename(Part *p, char *name);
  34. static char* skiptosemi(char *p);
  35. static char* skipwhite(char *p);
  36. static String* tokenconvert(String *t);
  37. static void writeheader(Part *p, int);
  38. enum
  39. {
  40. /* encodings */
  41. Enone= 0,
  42. Ebase64,
  43. Equoted,
  44. /* disposition possibilities */
  45. Dnone= 0,
  46. Dinline,
  47. Dfile,
  48. Dignore,
  49. PAD64= '=',
  50. };
  51. /*
  52. * a message part; either the whole message or a subpart
  53. */
  54. struct Part
  55. {
  56. Part *pp; /* parent part */
  57. Hline *hl; /* linked list of header lines */
  58. int disposition;
  59. int encoding;
  60. int badfile;
  61. int badtype;
  62. String *boundary; /* boundary for multiparts */
  63. int blen;
  64. String *charset; /* character set */
  65. String *type; /* content type */
  66. String *filename; /* file name */
  67. Biobuf *tmpbuf; /* diversion input buffer */
  68. };
  69. /*
  70. * a (multi)line header
  71. */
  72. struct Hline
  73. {
  74. Hline *next;
  75. String *s;
  76. };
  77. /*
  78. * header definitions for parsing
  79. */
  80. struct Hdef
  81. {
  82. char *type;
  83. void (*f)(Part*, Hdef*, char*);
  84. int len;
  85. };
  86. Hdef hdefs[] =
  87. {
  88. { "content-type:", ctype, },
  89. { "content-transfer-encoding:", cencoding, },
  90. { "content-disposition:", cdisposition, },
  91. { 0, },
  92. };
  93. /*
  94. * acceptable content types and their extensions
  95. */
  96. struct Mtype {
  97. Mtype *next;
  98. char *ext; /* extension */
  99. char *gtype; /* generic content type */
  100. char *stype; /* specific content type */
  101. char class;
  102. };
  103. Mtype *mtypes;
  104. int justreject;
  105. char *savefile;
  106. void
  107. usage(void)
  108. {
  109. fprint(2, "usage: upas/vf [-r] [-s savefile]\n");
  110. exits("usage");
  111. }
  112. void
  113. main(int argc, char **argv)
  114. {
  115. ARGBEGIN{
  116. case 'r':
  117. justreject = 1;
  118. break;
  119. case 's':
  120. savefile = EARGF(usage());
  121. break;
  122. default:
  123. usage();
  124. }ARGEND
  125. if(argc)
  126. usage();
  127. Binit(&in, 0, OREAD);
  128. Binit(&out, 1, OWRITE);
  129. init_hdefs();
  130. readmtypes();
  131. /* pass through our standard 'From ' line */
  132. passunixheader();
  133. /* parse with the top level part */
  134. part(nil);
  135. exits(0);
  136. }
  137. void
  138. refuse(char *reason)
  139. {
  140. static char msg[] =
  141. "mail refused: we don't accept executable attachments";
  142. postnote(PNGROUP, getpid(), smprint("%s: %s", msg, reason));
  143. exits(msg);
  144. }
  145. /*
  146. * parse a part; returns the ancestor whose boundary terminated
  147. * this part or nil on EOF.
  148. */
  149. static Part*
  150. part(Part *pp)
  151. {
  152. Part *p, *np;
  153. p = mallocz(sizeof *p, 1);
  154. p->pp = pp;
  155. readheader(p);
  156. if(p->boundary != nil){
  157. /* the format of a multipart part is always:
  158. * header
  159. * null or ignored body
  160. * boundary
  161. * header
  162. * body
  163. * boundary
  164. * ...
  165. */
  166. writeheader(p, 1);
  167. np = passbody(p, 1);
  168. if(np != p)
  169. return np;
  170. for(;;){
  171. np = part(p);
  172. if(np != p)
  173. return np;
  174. }
  175. } else {
  176. /* no boundary */
  177. /* may still be multipart if this is a forwarded message */
  178. if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){
  179. /* the format of forwarded message is:
  180. * header
  181. * header
  182. * body
  183. */
  184. writeheader(p, 1);
  185. passnotheader();
  186. return part(p);
  187. } else {
  188. /*
  189. * This is the meat. This may be an executable.
  190. * if so, wrap it and change its type
  191. */
  192. if(p->badtype || p->badfile){
  193. if(p->badfile == 2){
  194. if(savefile != nil)
  195. save(p, savefile);
  196. syslog(0, "vf", "vf rejected %s %s",
  197. p->type? s_to_c(p->type): "?",
  198. p->filename?s_to_c(p->filename):"?");
  199. fprint(2, "The mail contained an executable attachment.\n");
  200. fprint(2, "We refuse all mail containing such.\n");
  201. refuse(nil);
  202. }
  203. np = problemchild(p);
  204. if(np != p)
  205. return np;
  206. /* if problemchild returns p, it turns out p is okay: fall thru */
  207. }
  208. writeheader(p, 1);
  209. return passbody(p, 1);
  210. }
  211. }
  212. }
  213. /*
  214. * read and parse a complete header
  215. */
  216. static void
  217. readheader(Part *p)
  218. {
  219. Hline *hl, **l;
  220. Hdef *hd;
  221. l = &p->hl;
  222. for(;;){
  223. hl = readhl();
  224. if(hl == nil)
  225. break;
  226. *l = hl;
  227. l = &hl->next;
  228. for(hd = hdefs; hd->type != nil; hd++){
  229. if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){
  230. (*hd->f)(p, hd, s_to_c(hl->s));
  231. break;
  232. }
  233. }
  234. }
  235. }
  236. /*
  237. * read a possibly multiline header line
  238. */
  239. static Hline*
  240. readhl(void)
  241. {
  242. Hline *hl;
  243. String *s;
  244. char *p;
  245. int n;
  246. p = Brdline(&in, '\n');
  247. if(p == nil)
  248. return nil;
  249. n = Blinelen(&in);
  250. if(memchr(p, ':', n) == nil){
  251. Bseek(&in, -n, 1);
  252. return nil;
  253. }
  254. s = s_nappend(s_new(), p, n);
  255. for(;;){
  256. p = Brdline(&in, '\n');
  257. if(p == nil)
  258. break;
  259. n = Blinelen(&in);
  260. if(*p != ' ' && *p != '\t'){
  261. Bseek(&in, -n, 1);
  262. break;
  263. }
  264. s = s_nappend(s, p, n);
  265. }
  266. hl = malloc(sizeof *hl);
  267. hl->s = s;
  268. hl->next = nil;
  269. return hl;
  270. }
  271. /*
  272. * write out a complete header
  273. */
  274. static void
  275. writeheader(Part *p, int xfree)
  276. {
  277. Hline *hl, *next;
  278. for(hl = p->hl; hl != nil; hl = next){
  279. Bprint(&out, "%s", s_to_c(hl->s));
  280. if(xfree)
  281. s_free(hl->s);
  282. next = hl->next;
  283. if(xfree)
  284. free(hl);
  285. }
  286. if(xfree)
  287. p->hl = nil;
  288. }
  289. /*
  290. * pass a body through. return if we hit one of our ancestors'
  291. * boundaries or EOF. if we hit a boundary, return a pointer to
  292. * that ancestor. if we hit EOF, return nil.
  293. */
  294. static Part*
  295. passbody(Part *p, int dobound)
  296. {
  297. Part *pp;
  298. Biobuf *b;
  299. char *cp;
  300. for(;;){
  301. if(p->tmpbuf){
  302. b = p->tmpbuf;
  303. cp = Brdline(b, '\n');
  304. if(cp == nil){
  305. Bterm(b);
  306. p->tmpbuf = nil;
  307. goto Stdin;
  308. }
  309. }else{
  310. Stdin:
  311. b = &in;
  312. cp = Brdline(b, '\n');
  313. }
  314. if(cp == nil)
  315. return nil;
  316. for(pp = p; pp != nil; pp = pp->pp)
  317. if(pp->boundary != nil
  318. && strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){
  319. if(dobound)
  320. Bwrite(&out, cp, Blinelen(b));
  321. else
  322. Bseek(b, -Blinelen(b), 1);
  323. return pp;
  324. }
  325. Bwrite(&out, cp, Blinelen(b));
  326. }
  327. }
  328. /*
  329. * save the message somewhere
  330. */
  331. static vlong bodyoff; /* clumsy hack */
  332. static int
  333. save(Part *p, char *file)
  334. {
  335. int fd;
  336. char *cp;
  337. Bterm(&out);
  338. memset(&out, 0, sizeof(out));
  339. fd = open(file, OWRITE);
  340. if(fd < 0)
  341. return -1;
  342. seek(fd, 0, 2);
  343. Binit(&out, fd, OWRITE);
  344. cp = ctime(time(0));
  345. cp[28] = 0;
  346. Bprint(&out, "From virusfilter %s\n", cp);
  347. writeheader(p, 0);
  348. bodyoff = Boffset(&out);
  349. passbody(p, 1);
  350. Bprint(&out, "\n");
  351. Bterm(&out);
  352. close(fd);
  353. memset(&out, 0, sizeof out);
  354. Binit(&out, 1, OWRITE);
  355. return 0;
  356. }
  357. /*
  358. * write to a file but save the fd for passbody.
  359. */
  360. static char*
  361. savetmp(Part *p)
  362. {
  363. char *buf, *name;
  364. int fd;
  365. buf = smprint("%s/vf.XXXXXXXXXXX", UPASTMP);
  366. name = mktemp(buf);
  367. if((fd = create(name, OWRITE|OEXCL, 0666)) < 0){
  368. fprint(2, "%s: error creating temporary file: %r\n", argv0);
  369. refuse("can't create temporary file");
  370. }
  371. free(buf);
  372. close(fd);
  373. if(save(p, name) < 0){
  374. fprint(2, "%s: error saving temporary file: %r\n", argv0);
  375. refuse("can't write temporary file");
  376. }
  377. if(p->tmpbuf){
  378. fprint(2, "%s: error in savetmp: already have tmp file!\n",
  379. argv0);
  380. refuse("already have temporary file");
  381. }
  382. p->tmpbuf = Bopen(name, OREAD|ORCLOSE);
  383. if(p->tmpbuf == nil){
  384. fprint(2, "%s: error reading temporary file: %r\n", argv0);
  385. refuse("error reading temporary file");
  386. }
  387. Bseek(p->tmpbuf, bodyoff, 0);
  388. return strdup(name);
  389. }
  390. /*
  391. * Run the external checker to do content-based checks.
  392. */
  393. static int
  394. runchecker(Part *p)
  395. {
  396. int pid;
  397. char *name;
  398. Waitmsg *w;
  399. if(access("/mail/lib/validateattachment", AEXEC) < 0)
  400. return 0;
  401. name = savetmp(p);
  402. fprint(2, "run checker %s\n", name);
  403. switch(pid = fork()){
  404. case -1:
  405. sysfatal("fork: %r");
  406. case 0:
  407. dup(2, 1);
  408. execl("/mail/lib/validateattachment", "validateattachment", name, nil);
  409. _exits("exec failed");
  410. }
  411. /*
  412. * Okay to return on error - will let mail through but wrapped.
  413. */
  414. w = wait();
  415. if(w == nil){
  416. syslog(0, "mail", "vf wait failed: %r");
  417. return 0;
  418. }
  419. if(w->pid != pid){
  420. syslog(0, "mail", "vf wrong pid %d != %d", w->pid, pid);
  421. return 0;
  422. }
  423. if(p->filename)
  424. name = s_to_c(p->filename);
  425. if(strstr(w->msg, "discard")){
  426. syslog(0, "mail", "vf validateattachment rejected %s", name);
  427. refuse("rejected by validateattachment");
  428. }
  429. if(strstr(w->msg, "accept")){
  430. syslog(0, "mail", "vf validateattachment accepted %s", name);
  431. return 1;
  432. }
  433. free(w);
  434. return 0;
  435. }
  436. /*
  437. * emit a multipart Part that explains the problem
  438. */
  439. static Part*
  440. problemchild(Part *p)
  441. {
  442. Part *np;
  443. Hline *hl;
  444. String *boundary;
  445. char *cp;
  446. /*
  447. * We don't know whether the attachment is okay.
  448. * If there's an external checker, let it have a crack at it.
  449. */
  450. if(runchecker(p) > 0)
  451. return p;
  452. if(justreject)
  453. return p;
  454. fprint(2, "x\n");
  455. syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?",
  456. p->filename?s_to_c(p->filename):"?");
  457. fprint(2, "x\n");
  458. boundary = mkboundary();
  459. fprint(2, "x\n");
  460. /* print out non-mime headers */
  461. for(hl = p->hl; hl != nil; hl = hl->next)
  462. if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0)
  463. Bprint(&out, "%s", s_to_c(hl->s));
  464. fprint(2, "x\n");
  465. /* add in our own multipart headers and message */
  466. Bprint(&out, "Content-Type: multipart/mixed;\n");
  467. Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary));
  468. Bprint(&out, "Content-Disposition: inline\n");
  469. Bprint(&out, "\n");
  470. Bprint(&out, "This is a multi-part message in MIME format.\n");
  471. Bprint(&out, "--%s\n", s_to_c(boundary));
  472. Bprint(&out, "Content-Disposition: inline\n");
  473. Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n");
  474. Bprint(&out, "Content-Transfer-Encoding: 7bit\n");
  475. Bprint(&out, "\n");
  476. Bprint(&out, "from postmaster@%s:\n", sysname());
  477. Bprint(&out, "The following attachment had content that we can't\n");
  478. Bprint(&out, "prove to be harmless. To avoid possible automatic\n");
  479. Bprint(&out, "execution, we changed the content headers.\n");
  480. Bprint(&out, "The original header was:\n\n");
  481. /* print out original header lines */
  482. for(hl = p->hl; hl != nil; hl = hl->next)
  483. if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0)
  484. Bprint(&out, "\t%s", s_to_c(hl->s));
  485. Bprint(&out, "--%s\n", s_to_c(boundary));
  486. /* change file name */
  487. if(p->filename)
  488. s_append(p->filename, ".suspect");
  489. else
  490. p->filename = s_copy("file.suspect");
  491. /* print out new header */
  492. Bprint(&out, "Content-Type: application/octet-stream\n");
  493. Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename));
  494. switch(p->encoding){
  495. case Enone:
  496. break;
  497. case Ebase64:
  498. Bprint(&out, "Content-Transfer-Encoding: base64\n");
  499. break;
  500. case Equoted:
  501. Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n");
  502. break;
  503. }
  504. fprint(2, "z\n");
  505. /* pass the body */
  506. np = passbody(p, 0);
  507. fprint(2, "w\n");
  508. /* add the new boundary and the original terminator */
  509. Bprint(&out, "--%s--\n", s_to_c(boundary));
  510. if(np && np->boundary){
  511. cp = Brdline(&in, '\n');
  512. Bwrite(&out, cp, Blinelen(&in));
  513. }
  514. fprint(2, "a %p\n", np);
  515. return np;
  516. }
  517. static int
  518. isattribute(char **pp, char *attr)
  519. {
  520. char *p;
  521. int n;
  522. n = strlen(attr);
  523. p = *pp;
  524. if(cistrncmp(p, attr, n) != 0)
  525. return 0;
  526. p += n;
  527. while(*p == ' ')
  528. p++;
  529. if(*p++ != '=')
  530. return 0;
  531. while(*p == ' ')
  532. p++;
  533. *pp = p;
  534. return 1;
  535. }
  536. /*
  537. * parse content type header
  538. */
  539. static void
  540. ctype(Part *p, Hdef *h, char *cp)
  541. {
  542. String *s;
  543. cp += h->len;
  544. cp = skipwhite(cp);
  545. p->type = s_new();
  546. cp = getstring(cp, p->type, 1);
  547. if(badtype(s_to_c(p->type)))
  548. p->badtype = 1;
  549. while(*cp){
  550. if(isattribute(&cp, "boundary")){
  551. s = s_new();
  552. cp = getstring(cp, s, 0);
  553. p->boundary = s_reset(p->boundary);
  554. s_append(p->boundary, "--");
  555. s_append(p->boundary, s_to_c(s));
  556. p->blen = s_len(p->boundary);
  557. s_free(s);
  558. } else if(cistrncmp(cp, "multipart", 9) == 0){
  559. /*
  560. * the first unbounded part of a multipart message,
  561. * the preamble, is not displayed or saved
  562. */
  563. } else if(isattribute(&cp, "name")){
  564. setfilename(p, cp);
  565. } else if(isattribute(&cp, "charset")){
  566. if(p->charset == nil)
  567. p->charset = s_new();
  568. cp = getstring(cp, s_reset(p->charset), 0);
  569. }
  570. cp = skiptosemi(cp);
  571. }
  572. }
  573. /*
  574. * parse content encoding header
  575. */
  576. static void
  577. cencoding(Part *m, Hdef *h, char *p)
  578. {
  579. p += h->len;
  580. p = skipwhite(p);
  581. if(cistrncmp(p, "base64", 6) == 0)
  582. m->encoding = Ebase64;
  583. else if(cistrncmp(p, "quoted-printable", 16) == 0)
  584. m->encoding = Equoted;
  585. }
  586. /*
  587. * parse content disposition header
  588. */
  589. static void
  590. cdisposition(Part *p, Hdef *h, char *cp)
  591. {
  592. cp += h->len;
  593. cp = skipwhite(cp);
  594. while(*cp){
  595. if(cistrncmp(cp, "inline", 6) == 0){
  596. p->disposition = Dinline;
  597. } else if(cistrncmp(cp, "attachment", 10) == 0){
  598. p->disposition = Dfile;
  599. } else if(cistrncmp(cp, "filename=", 9) == 0){
  600. cp += 9;
  601. setfilename(p, cp);
  602. }
  603. cp = skiptosemi(cp);
  604. }
  605. }
  606. static void
  607. setfilename(Part *p, char *name)
  608. {
  609. if(p->filename == nil)
  610. p->filename = s_new();
  611. getstring(name, s_reset(p->filename), 0);
  612. p->filename = tokenconvert(p->filename);
  613. p->badfile = badfile(s_to_c(p->filename));
  614. }
  615. static char*
  616. skipwhite(char *p)
  617. {
  618. while(isspace(*p))
  619. p++;
  620. return p;
  621. }
  622. static char*
  623. skiptosemi(char *p)
  624. {
  625. while(*p && *p != ';')
  626. p++;
  627. while(*p == ';' || isspace(*p))
  628. p++;
  629. return p;
  630. }
  631. /*
  632. * parse a possibly "'d string from a header. A
  633. * ';' terminates the string.
  634. */
  635. static char*
  636. getstring(char *p, String *s, int dolower)
  637. {
  638. s = s_reset(s);
  639. p = skipwhite(p);
  640. if(*p == '"'){
  641. p++;
  642. for(;*p && *p != '"'; p++)
  643. if(dolower)
  644. s_putc(s, tolower(*p));
  645. else
  646. s_putc(s, *p);
  647. if(*p == '"')
  648. p++;
  649. s_terminate(s);
  650. return p;
  651. }
  652. for(; *p && !isspace(*p) && *p != ';'; p++)
  653. if(dolower)
  654. s_putc(s, tolower(*p));
  655. else
  656. s_putc(s, *p);
  657. s_terminate(s);
  658. return p;
  659. }
  660. static void
  661. init_hdefs(void)
  662. {
  663. Hdef *hd;
  664. static int already;
  665. if(already)
  666. return;
  667. already = 1;
  668. for(hd = hdefs; hd->type != nil; hd++)
  669. hd->len = strlen(hd->type);
  670. }
  671. /*
  672. * create a new boundary
  673. */
  674. static String*
  675. mkboundary(void)
  676. {
  677. char buf[32];
  678. int i;
  679. static int already;
  680. if(already == 0){
  681. srand((time(0)<<16)|getpid());
  682. already = 1;
  683. }
  684. strcpy(buf, "upas-");
  685. for(i = 5; i < sizeof(buf)-1; i++)
  686. buf[i] = 'a' + nrand(26);
  687. buf[i] = 0;
  688. return s_copy(buf);
  689. }
  690. /*
  691. * skip blank lines till header
  692. */
  693. static void
  694. passnotheader(void)
  695. {
  696. char *cp;
  697. int i, n;
  698. while((cp = Brdline(&in, '\n')) != nil){
  699. n = Blinelen(&in);
  700. for(i = 0; i < n-1; i++)
  701. if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){
  702. Bseek(&in, -n, 1);
  703. return;
  704. }
  705. Bwrite(&out, cp, n);
  706. }
  707. }
  708. /*
  709. * pass unix header lines
  710. */
  711. static void
  712. passunixheader(void)
  713. {
  714. char *p;
  715. int n;
  716. while((p = Brdline(&in, '\n')) != nil){
  717. n = Blinelen(&in);
  718. if(strncmp(p, "From ", 5) != 0){
  719. Bseek(&in, -n, 1);
  720. break;
  721. }
  722. Bwrite(&out, p, n);
  723. }
  724. }
  725. /*
  726. * Read mime types
  727. */
  728. static void
  729. readmtypes(void)
  730. {
  731. Biobuf *b;
  732. char *p;
  733. char *f[6];
  734. Mtype *m;
  735. Mtype **l;
  736. b = Bopen("/sys/lib/mimetype", OREAD);
  737. if(b == nil)
  738. return;
  739. l = &mtypes;
  740. while((p = Brdline(b, '\n')) != nil){
  741. if(*p == '#')
  742. continue;
  743. p[Blinelen(b)-1] = 0;
  744. if(tokenize(p, f, nelem(f)) < 5)
  745. continue;
  746. m = mallocz(sizeof *m, 1);
  747. if(m == nil)
  748. goto err;
  749. m->ext = strdup(f[0]);
  750. if(m->ext == 0)
  751. goto err;
  752. m->gtype = strdup(f[1]);
  753. if(m->gtype == 0)
  754. goto err;
  755. m->stype = strdup(f[2]);
  756. if(m->stype == 0)
  757. goto err;
  758. m->class = *f[4];
  759. *l = m;
  760. l = &(m->next);
  761. }
  762. Bterm(b);
  763. return;
  764. err:
  765. if(m == nil)
  766. return;
  767. free(m->ext);
  768. free(m->gtype);
  769. free(m->stype);
  770. free(m);
  771. Bterm(b);
  772. }
  773. /*
  774. * if the class is 'm' or 'y', accept it
  775. * if the class is 'p' check a previous extension
  776. * otherwise, filename is bad
  777. */
  778. static int
  779. badfile(char *name)
  780. {
  781. char *p;
  782. Mtype *m;
  783. int rv;
  784. p = strrchr(name, '.');
  785. if(p == nil)
  786. return 0;
  787. for(m = mtypes; m != nil; m = m->next)
  788. if(cistrcmp(p, m->ext) == 0){
  789. switch(m->class){
  790. case 'm':
  791. case 'y':
  792. return 0;
  793. case 'p':
  794. *p = 0;
  795. rv = badfile(name);
  796. *p = '.';
  797. return rv;
  798. case 'r':
  799. return 2;
  800. }
  801. }
  802. return 1;
  803. }
  804. /*
  805. * if the class is 'm' or 'y' or 'p', accept it
  806. * otherwise, filename is bad
  807. */
  808. static int
  809. badtype(char *type)
  810. {
  811. Mtype *m;
  812. char *s, *fix;
  813. int rv = 1;
  814. fix = s = strchr(type, '/');
  815. if(s != nil)
  816. *s++ = 0;
  817. else
  818. s = "-";
  819. for(m = mtypes; m != nil; m = m->next){
  820. if(cistrcmp(type, m->gtype) != 0)
  821. continue;
  822. if(cistrcmp(s, m->stype) != 0)
  823. continue;
  824. switch(m->class){
  825. case 'y':
  826. case 'p':
  827. case 'm':
  828. rv = 0;
  829. break;
  830. }
  831. break;
  832. }
  833. if(fix != nil)
  834. *fix = '/';
  835. return rv;
  836. }
  837. /* rfc2047 non-ascii */
  838. typedef struct Charset Charset;
  839. struct Charset {
  840. char *name;
  841. int len;
  842. int convert;
  843. } charsets[] =
  844. {
  845. { "us-ascii", 8, 1, },
  846. { "utf-8", 5, 0, },
  847. { "iso-8859-1", 10, 1, },
  848. };
  849. /*
  850. * convert to UTF if need be
  851. */
  852. static String*
  853. tokenconvert(String *t)
  854. {
  855. String *s;
  856. char decoded[1024];
  857. char utfbuf[2*1024];
  858. int i, len;
  859. char *e;
  860. char *token;
  861. token = s_to_c(t);
  862. len = s_len(t);
  863. if(token[0] != '=' || token[1] != '?' ||
  864. token[len-2] != '?' || token[len-1] != '=')
  865. goto err;
  866. e = token+len-2;
  867. token += 2;
  868. /* bail if we don't understand the character set */
  869. for(i = 0; i < nelem(charsets); i++)
  870. if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0)
  871. if(token[charsets[i].len] == '?'){
  872. token += charsets[i].len + 1;
  873. break;
  874. }
  875. if(i >= nelem(charsets))
  876. goto err;
  877. /* bail if it doesn't fit */
  878. if(strlen(token) > sizeof(decoded)-1)
  879. goto err;
  880. /* bail if we don't understand the encoding */
  881. if(cistrncmp(token, "b?", 2) == 0){
  882. token += 2;
  883. len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
  884. decoded[len] = 0;
  885. } else if(cistrncmp(token, "q?", 2) == 0){
  886. token += 2;
  887. len = decquoted(decoded, token, e);
  888. if(len > 0 && decoded[len-1] == '\n')
  889. len--;
  890. decoded[len] = 0;
  891. } else
  892. goto err;
  893. s = nil;
  894. switch(charsets[i].convert){
  895. case 0:
  896. s = s_copy(decoded);
  897. break;
  898. case 1:
  899. s = s_new();
  900. latin1toutf(utfbuf, decoded, decoded+len);
  901. s_append(s, utfbuf);
  902. break;
  903. }
  904. return s;
  905. err:
  906. return s_clone(t);
  907. }
  908. /*
  909. * decode quoted
  910. */
  911. enum
  912. {
  913. Self= 1,
  914. Hex= 2,
  915. };
  916. uchar tableqp[256];
  917. static void
  918. initquoted(void)
  919. {
  920. int c;
  921. memset(tableqp, 0, 256);
  922. for(c = ' '; c <= '<'; c++)
  923. tableqp[c] = Self;
  924. for(c = '>'; c <= '~'; c++)
  925. tableqp[c] = Self;
  926. tableqp['\t'] = Self;
  927. tableqp['='] = Hex;
  928. }
  929. static int
  930. hex2int(int x)
  931. {
  932. if(x >= '0' && x <= '9')
  933. return x - '0';
  934. if(x >= 'A' && x <= 'F')
  935. return (x - 'A') + 10;
  936. if(x >= 'a' && x <= 'f')
  937. return (x - 'a') + 10;
  938. return 0;
  939. }
  940. static char*
  941. decquotedline(char *out, char *in, char *e)
  942. {
  943. int c, soft;
  944. /* dump trailing white space */
  945. while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
  946. e--;
  947. /* trailing '=' means no newline */
  948. if(*e == '='){
  949. soft = 1;
  950. e--;
  951. } else
  952. soft = 0;
  953. while(in <= e){
  954. c = (*in++) & 0xff;
  955. switch(tableqp[c]){
  956. case Self:
  957. *out++ = c;
  958. break;
  959. case Hex:
  960. c = hex2int(*in++)<<4;
  961. c |= hex2int(*in++);
  962. *out++ = c;
  963. break;
  964. }
  965. }
  966. if(!soft)
  967. *out++ = '\n';
  968. *out = 0;
  969. return out;
  970. }
  971. static int
  972. decquoted(char *out, char *in, char *e)
  973. {
  974. char *p, *nl;
  975. if(tableqp[' '] == 0)
  976. initquoted();
  977. p = out;
  978. while((nl = strchr(in, '\n')) != nil && nl < e){
  979. p = decquotedline(p, in, nl);
  980. in = nl + 1;
  981. }
  982. if(in < e)
  983. p = decquotedline(p, in, e-1);
  984. /* make sure we end with a new line */
  985. if(*(p-1) != '\n'){
  986. *p++ = '\n';
  987. *p = 0;
  988. }
  989. return p - out;
  990. }
  991. /* translate latin1 directly since it fits neatly in utf */
  992. static int
  993. latin1toutf(char *out, char *in, char *e)
  994. {
  995. Rune r;
  996. char *p;
  997. p = out;
  998. for(; in < e; in++){
  999. r = (*in) & 0xff;
  1000. p += runetochar(p, &r);
  1001. }
  1002. *p = 0;
  1003. return p - out;
  1004. }