vf.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944
  1. #include "common.h"
  2. #include <ctype.h>
  3. Biobuf in;
  4. Biobuf out;
  5. typedef struct Mtype Mtype;
  6. typedef struct Hdef Hdef;
  7. typedef struct Hline Hline;
  8. typedef struct Part Part;
  9. static int badfile(char *name);
  10. static int badtype(char *type);
  11. static void ctype(Part*, Hdef*, char*);
  12. static void cencoding(Part*, Hdef*, char*);
  13. static void cdisposition(Part*, Hdef*, char*);
  14. static int decquoted(char *out, char *in, char *e);
  15. static char* getstring(char *p, String *s, int dolower);
  16. static void init_hdefs(void);
  17. static int isattribute(char **pp, char *attr);
  18. static int latin1toutf(char *out, char *in, char *e);
  19. static String* mkboundary(void);
  20. static Part* part(Part *pp);
  21. static Part* passbody(Part *p, int dobound);
  22. static void passnotheader(void);
  23. static void passunixheader(void);
  24. static Part* problemchild(Part *p);
  25. static void readheader(Part *p);
  26. static Hline* readhl(void);
  27. static void readmtypes(void);
  28. static void setfilename(Part *p, char *name);
  29. static char* skiptosemi(char *p);
  30. static char* skipwhite(char *p);
  31. static String* tokenconvert(String *t);
  32. static void writeheader(Part *p);
  33. enum
  34. {
  35. // encodings
  36. Enone= 0,
  37. Ebase64,
  38. Equoted,
  39. // disposition possibilities
  40. Dnone= 0,
  41. Dinline,
  42. Dfile,
  43. Dignore,
  44. PAD64= '=',
  45. };
  46. /*
  47. * a message part; either the whole message or a subpart
  48. */
  49. struct Part
  50. {
  51. Part *pp; /* parent part */
  52. Hline *hl; /* linked list of header lines */
  53. int disposition;
  54. int encoding;
  55. int badfile;
  56. int badtype;
  57. String *boundary; /* boundary for multiparts */
  58. int blen;
  59. String *charset; /* character set */
  60. String *type; /* content type */
  61. String *filename; /* content type */
  62. };
  63. /*
  64. * a (multi)line header
  65. */
  66. struct Hline
  67. {
  68. Hline *next;
  69. String *s;
  70. };
  71. /*
  72. * header definitions for parsing
  73. */
  74. struct Hdef
  75. {
  76. char *type;
  77. void (*f)(Part*, Hdef*, char*);
  78. int len;
  79. };
  80. Hdef hdefs[] =
  81. {
  82. { "content-type:", ctype, },
  83. { "content-transfer-encoding:", cencoding, },
  84. { "content-disposition:", cdisposition, },
  85. { 0, },
  86. };
  87. /*
  88. * acceptable content types and their extensions
  89. */
  90. struct Mtype {
  91. Mtype *next;
  92. char *ext; /* extension */
  93. char *gtype; /* generic content type */
  94. char *stype; /* specific content type */
  95. char class;
  96. };
  97. Mtype *mtypes;
  98. int justreject;
  99. /*
  100. * this is a filter that changes mime types and names of
  101. * suspect attachments.
  102. *
  103. */
  104. void
  105. main(int argc, char **argv)
  106. {
  107. ARGBEGIN{
  108. case 'r':
  109. justreject = 1;
  110. break;
  111. }ARGEND;
  112. Binit(&in, 0, OREAD);
  113. Binit(&out, 1, OWRITE);
  114. init_hdefs();
  115. readmtypes();
  116. /* pass through our standard 'From ' line */
  117. passunixheader();
  118. /* parse with the top level part */
  119. part(nil);
  120. exits(0);
  121. }
  122. /*
  123. * parse a part; returns the ancestor whose boundary terminated
  124. * this part or nil on EOF.
  125. */
  126. static Part*
  127. part(Part *pp)
  128. {
  129. Part *p, *np;
  130. p = mallocz(sizeof *p, 1);
  131. p->pp = pp;
  132. readheader(p);
  133. if(p->boundary != nil){
  134. /* the format of a multipart part is always:
  135. * header
  136. * null or ignored body
  137. * boundary
  138. * header
  139. * body
  140. * boundary
  141. * ...
  142. */
  143. writeheader(p);
  144. np = passbody(p, 1);
  145. if(np != p)
  146. return np;
  147. for(;;){
  148. np = part(p);
  149. if(np != p)
  150. return np;
  151. }
  152. } else {
  153. /* no boundary */
  154. /* may still be multipart if this is a forwarded message */
  155. if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){
  156. /* the format of forwarded message is:
  157. * header
  158. * header
  159. * body
  160. */
  161. writeheader(p);
  162. passnotheader();
  163. return part(p);
  164. } else {
  165. /* This is the meat. This may be an executable.
  166. * if so, wrap it and change its type
  167. */
  168. if(p->badtype || p->badfile){
  169. if(p->badfile == 2){
  170. syslog(0, "vf", "vf rejected %s %s", p->type?s_to_c(p->type):"?",
  171. p->filename?s_to_c(p->filename):"?");
  172. fprint(2, "The mail contained an attachment which was a DOS/Windows\n");
  173. fprint(2, "executable file. We refuse all mail containing such.\n");
  174. postnote(PNGROUP, getpid(), "mail refused: we don't accept executable attachments");
  175. exits("we don't accept executable attachments");
  176. }
  177. return problemchild(p);
  178. } else {
  179. writeheader(p);
  180. return passbody(p, 1);
  181. }
  182. }
  183. }
  184. }
  185. /*
  186. * read and parse a complete header
  187. */
  188. static void
  189. readheader(Part *p)
  190. {
  191. Hline *hl, **l;
  192. Hdef *hd;
  193. l = &p->hl;
  194. for(;;){
  195. hl = readhl();
  196. if(hl == nil)
  197. break;
  198. *l = hl;
  199. l = &hl->next;
  200. for(hd = hdefs; hd->type != nil; hd++){
  201. if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){
  202. (*hd->f)(p, hd, s_to_c(hl->s));
  203. break;
  204. }
  205. }
  206. }
  207. }
  208. /*
  209. * read a possibly multiline header line
  210. */
  211. static Hline*
  212. readhl(void)
  213. {
  214. Hline *hl;
  215. String *s;
  216. char *p;
  217. int n;
  218. p = Brdline(&in, '\n');
  219. if(p == nil)
  220. return nil;
  221. n = Blinelen(&in);
  222. if(memchr(p, ':', n) == nil){
  223. Bseek(&in, -n, 1);
  224. return nil;
  225. }
  226. s = s_nappend(s_new(), p, n);
  227. for(;;){
  228. p = Brdline(&in, '\n');
  229. if(p == nil)
  230. break;
  231. n = Blinelen(&in);
  232. if(*p != ' ' && *p != '\t'){
  233. Bseek(&in, -n, 1);
  234. break;
  235. }
  236. s = s_nappend(s, p, n);
  237. }
  238. hl = malloc(sizeof *hl);
  239. hl->s = s;
  240. hl->next = nil;
  241. return hl;
  242. }
  243. /*
  244. * write out a complete header
  245. */
  246. static void
  247. writeheader(Part *p)
  248. {
  249. Hline *hl, *next;
  250. for(hl = p->hl; hl != nil; hl = next){
  251. Bprint(&out, "%s", s_to_c(hl->s));
  252. s_free(hl->s);
  253. next = hl->next;
  254. free(hl);
  255. }
  256. p->hl = nil;
  257. }
  258. /*
  259. * pass a body through. return if we hit one of our ancestors'
  260. * boundaries or EOF. if we hit a boundary, return a pointer to
  261. * that ancestor. if we hit EOF, return nil.
  262. */
  263. static Part*
  264. passbody(Part *p, int dobound)
  265. {
  266. Part *pp;
  267. char *cp;
  268. for(;;){
  269. cp = Brdline(&in, '\n');
  270. if(cp == nil)
  271. return nil;
  272. for(pp = p; pp != nil; pp = pp->pp)
  273. if(pp->boundary != nil
  274. && strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){
  275. if(dobound)
  276. Bwrite(&out, cp, Blinelen(&in));
  277. else
  278. Bseek(&in, -Blinelen(&in), 1);
  279. return pp;
  280. }
  281. Bwrite(&out, cp, Blinelen(&in));
  282. }
  283. return nil;
  284. }
  285. /*
  286. * emit a multipart Part that explains the problem
  287. */
  288. static Part*
  289. problemchild(Part *p)
  290. {
  291. Part *np;
  292. Hline *hl;
  293. String *boundary;
  294. char *cp;
  295. syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?",
  296. p->filename?s_to_c(p->filename):"?");
  297. boundary = mkboundary();
  298. /* print out non-mime headers */
  299. for(hl = p->hl; hl != nil; hl = hl->next)
  300. if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0)
  301. Bprint(&out, "%s", s_to_c(hl->s));
  302. /* add in out own multipart headers and message */
  303. Bprint(&out, "Content-Type: multipart/mixed;\n");
  304. Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary));
  305. Bprint(&out, "Content-Disposition: inline\n");
  306. Bprint(&out, "\n");
  307. Bprint(&out, "This is a multi-part message in MIME format.\n");
  308. Bprint(&out, "--%s\n", s_to_c(boundary));
  309. Bprint(&out, "Content-Disposition: inline\n");
  310. Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n");
  311. Bprint(&out, "Content-Transfer-Encoding: 7bit\n");
  312. Bprint(&out, "\n");
  313. Bprint(&out, "The following attachment had content that we can't\n");
  314. Bprint(&out, "prove to be harmless. To avoid possible automatic\n");
  315. Bprint(&out, "execution, we changed the content headers.\n");
  316. Bprint(&out, "The original header was:\n\n");
  317. /* print out original header lines */
  318. for(hl = p->hl; hl != nil; hl = hl->next)
  319. if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0)
  320. Bprint(&out, "\t%s", s_to_c(hl->s));
  321. Bprint(&out, "--%s\n", s_to_c(boundary));
  322. /* change file name */
  323. if(p->filename)
  324. s_append(p->filename, ".suspect");
  325. else
  326. p->filename = s_copy("file.suspect");
  327. /* print out new header */
  328. Bprint(&out, "Content-Type: application/octet-stream\n");
  329. Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename));
  330. switch(p->encoding){
  331. case Enone:
  332. break;
  333. case Ebase64:
  334. Bprint(&out, "Content-Transfer-Encoding: base64\n");
  335. break;
  336. case Equoted:
  337. Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n");
  338. break;
  339. }
  340. /* pass the body */
  341. np = passbody(p, 0);
  342. /* add the new boundary and the original terminator */
  343. Bprint(&out, "--%s--\n", s_to_c(boundary));
  344. if(np && np->boundary){
  345. cp = Brdline(&in, '\n');
  346. Bwrite(&out, cp, Blinelen(&in));
  347. }
  348. return np;
  349. }
  350. static int
  351. isattribute(char **pp, char *attr)
  352. {
  353. char *p;
  354. int n;
  355. n = strlen(attr);
  356. p = *pp;
  357. if(cistrncmp(p, attr, n) != 0)
  358. return 0;
  359. p += n;
  360. while(*p == ' ')
  361. p++;
  362. if(*p++ != '=')
  363. return 0;
  364. while(*p == ' ')
  365. p++;
  366. *pp = p;
  367. return 1;
  368. }
  369. /*
  370. * parse content type header
  371. */
  372. static void
  373. ctype(Part *p, Hdef *h, char *cp)
  374. {
  375. String *s;
  376. cp += h->len;
  377. cp = skipwhite(cp);
  378. p->type = s_new();
  379. cp = getstring(cp, p->type, 1);
  380. if(badtype(s_to_c(p->type)))
  381. p->badtype = 1;
  382. while(*cp){
  383. if(isattribute(&cp, "boundary")){
  384. s = s_new();
  385. cp = getstring(cp, s, 0);
  386. p->boundary = s_reset(p->boundary);
  387. s_append(p->boundary, "--");
  388. s_append(p->boundary, s_to_c(s));
  389. p->blen = s_len(p->boundary);
  390. s_free(s);
  391. } else if(cistrncmp(cp, "multipart", 9) == 0){
  392. /*
  393. * the first unbounded part of a multipart message,
  394. * the preamble, is not displayed or saved
  395. */
  396. } else if(isattribute(&cp, "name")){
  397. setfilename(p, cp);
  398. } else if(isattribute(&cp, "charset")){
  399. if(p->charset == nil)
  400. p->charset = s_new();
  401. cp = getstring(cp, s_reset(p->charset), 0);
  402. }
  403. cp = skiptosemi(cp);
  404. }
  405. }
  406. /*
  407. * parse content encoding header
  408. */
  409. static void
  410. cencoding(Part *m, Hdef *h, char *p)
  411. {
  412. p += h->len;
  413. p = skipwhite(p);
  414. if(cistrncmp(p, "base64", 6) == 0)
  415. m->encoding = Ebase64;
  416. else if(cistrncmp(p, "quoted-printable", 16) == 0)
  417. m->encoding = Equoted;
  418. }
  419. /*
  420. * parse content disposition header
  421. */
  422. static void
  423. cdisposition(Part *p, Hdef *h, char *cp)
  424. {
  425. cp += h->len;
  426. cp = skipwhite(cp);
  427. while(*cp){
  428. if(cistrncmp(cp, "inline", 6) == 0){
  429. p->disposition = Dinline;
  430. } else if(cistrncmp(cp, "attachment", 10) == 0){
  431. p->disposition = Dfile;
  432. } else if(cistrncmp(cp, "filename=", 9) == 0){
  433. cp += 9;
  434. setfilename(p, cp);
  435. }
  436. cp = skiptosemi(cp);
  437. }
  438. }
  439. static void
  440. setfilename(Part *p, char *name)
  441. {
  442. if(p->filename == nil)
  443. p->filename = s_new();
  444. getstring(name, s_reset(p->filename), 0);
  445. p->filename = tokenconvert(p->filename);
  446. p->badfile = badfile(s_to_c(p->filename));
  447. }
  448. static char*
  449. skipwhite(char *p)
  450. {
  451. while(isspace(*p))
  452. p++;
  453. return p;
  454. }
  455. static char*
  456. skiptosemi(char *p)
  457. {
  458. while(*p && *p != ';')
  459. p++;
  460. while(*p == ';' || isspace(*p))
  461. p++;
  462. return p;
  463. }
  464. /*
  465. * parse a possibly "'d string from a header. A
  466. * ';' terminates the string.
  467. */
  468. static char*
  469. getstring(char *p, String *s, int dolower)
  470. {
  471. s = s_reset(s);
  472. p = skipwhite(p);
  473. if(*p == '"'){
  474. p++;
  475. for(;*p && *p != '"'; p++)
  476. if(dolower)
  477. s_putc(s, tolower(*p));
  478. else
  479. s_putc(s, *p);
  480. if(*p == '"')
  481. p++;
  482. s_terminate(s);
  483. return p;
  484. }
  485. for(; *p && !isspace(*p) && *p != ';'; p++)
  486. if(dolower)
  487. s_putc(s, tolower(*p));
  488. else
  489. s_putc(s, *p);
  490. s_terminate(s);
  491. return p;
  492. }
  493. static void
  494. init_hdefs(void)
  495. {
  496. Hdef *hd;
  497. static int already;
  498. if(already)
  499. return;
  500. already = 1;
  501. for(hd = hdefs; hd->type != nil; hd++)
  502. hd->len = strlen(hd->type);
  503. }
  504. /*
  505. * create a new boundary
  506. */
  507. static String*
  508. mkboundary(void)
  509. {
  510. char buf[32];
  511. int i;
  512. static int already;
  513. if(already == 0){
  514. srand((time(0)<<16)|getpid());
  515. already = 1;
  516. }
  517. strcpy(buf, "upas-");
  518. for(i = 5; i < sizeof(buf)-1; i++)
  519. buf[i] = 'a' + nrand(26);
  520. buf[i] = 0;
  521. return s_copy(buf);
  522. }
  523. /*
  524. * skip blank lines till header
  525. */
  526. static void
  527. passnotheader(void)
  528. {
  529. char *cp;
  530. int i, n;
  531. while((cp = Brdline(&in, '\n')) != nil){
  532. n = Blinelen(&in);
  533. for(i = 0; i < n-1; i++)
  534. if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){
  535. Bseek(&in, -n, 1);
  536. return;
  537. }
  538. Bwrite(&out, cp, n);
  539. }
  540. }
  541. /*
  542. * pass unix header lines
  543. */
  544. static void
  545. passunixheader(void)
  546. {
  547. char *p;
  548. int n;
  549. while((p = Brdline(&in, '\n')) != nil){
  550. n = Blinelen(&in);
  551. if(strncmp(p, "From ", 5) != 0){
  552. Bseek(&in, -n, 1);
  553. break;
  554. }
  555. Bwrite(&out, p, n);
  556. }
  557. }
  558. /*
  559. * Read mime types
  560. */
  561. static void
  562. readmtypes(void)
  563. {
  564. Biobuf *b;
  565. char *p;
  566. char *f[6];
  567. Mtype *m;
  568. Mtype **l;
  569. b = Bopen("/sys/lib/mimetype", OREAD);
  570. if(b == nil)
  571. return;
  572. l = &mtypes;
  573. while((p = Brdline(b, '\n')) != nil){
  574. if(*p == '#')
  575. continue;
  576. p[Blinelen(b)-1] = 0;
  577. if(tokenize(p, f, nelem(f)) < 5)
  578. continue;
  579. m = mallocz(sizeof *m, 1);
  580. if(m == nil)
  581. goto err;
  582. m->ext = strdup(f[0]);
  583. if(m->ext == 0)
  584. goto err;
  585. m->gtype = strdup(f[1]);
  586. if(m->gtype == 0)
  587. goto err;
  588. m->stype = strdup(f[2]);
  589. if(m->stype == 0)
  590. goto err;
  591. m->class = *f[4];
  592. *l = m;
  593. l = &(m->next);
  594. }
  595. Bterm(b);
  596. return;
  597. err:
  598. if(m == nil)
  599. return;
  600. free(m->ext);
  601. free(m->gtype);
  602. free(m->stype);
  603. free(m);
  604. Bterm(b);
  605. }
  606. /*
  607. * if the class is 'm' or 'y', accept it
  608. * if the class is 'p' check a previous extension
  609. * otherwise, filename is bad
  610. */
  611. static int
  612. badfile(char *name)
  613. {
  614. char *p;
  615. Mtype *m;
  616. int rv;
  617. p = strrchr(name, '.');
  618. if(p == nil)
  619. return 0;
  620. for(m = mtypes; m != nil; m = m->next)
  621. if(cistrcmp(p, m->ext) == 0){
  622. switch(m->class){
  623. case 'm':
  624. case 'y':
  625. return 0;
  626. case 'p':
  627. *p = 0;
  628. rv = badfile(name);
  629. *p = '.';
  630. return rv;
  631. case 'r':
  632. return 2;
  633. }
  634. }
  635. if(justreject)
  636. return 0;
  637. return 1;
  638. }
  639. /*
  640. * if the class is 'm' or 'y' or 'p', accept it
  641. * otherwise, filename is bad
  642. */
  643. static int
  644. badtype(char *type)
  645. {
  646. Mtype *m;
  647. char *s, *fix;
  648. int rv = 1;
  649. if(justreject)
  650. return 0;
  651. fix = s = strchr(type, '/');
  652. if(s != nil)
  653. *s++ = 0;
  654. else
  655. s = "-";
  656. for(m = mtypes; m != nil; m = m->next){
  657. if(cistrcmp(type, m->gtype) != 0)
  658. continue;
  659. if(cistrcmp(s, m->stype) != 0)
  660. continue;
  661. switch(m->class){
  662. case 'y':
  663. case 'p':
  664. case 'm':
  665. rv = 0;
  666. break;
  667. }
  668. break;
  669. }
  670. if(fix != nil)
  671. *fix = '/';
  672. return rv;
  673. }
  674. /* rfc2047 non-ascii */
  675. typedef struct Charset Charset;
  676. struct Charset {
  677. char *name;
  678. int len;
  679. int convert;
  680. } charsets[] =
  681. {
  682. { "us-ascii", 8, 1, },
  683. { "utf-8", 5, 0, },
  684. { "iso-8859-1", 10, 1, },
  685. };
  686. /*
  687. * convert to UTF if need be
  688. */
  689. static String*
  690. tokenconvert(String *t)
  691. {
  692. String *s;
  693. char decoded[1024];
  694. char utfbuf[2*1024];
  695. int i, len;
  696. char *e;
  697. char *token;
  698. token = s_to_c(t);
  699. len = s_len(t);
  700. if(token[0] != '=' || token[1] != '?' ||
  701. token[len-2] != '?' || token[len-1] != '=')
  702. goto err;
  703. e = token+len-2;
  704. token += 2;
  705. // bail if we don't understand the character set
  706. for(i = 0; i < nelem(charsets); i++)
  707. if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0)
  708. if(token[charsets[i].len] == '?'){
  709. token += charsets[i].len + 1;
  710. break;
  711. }
  712. if(i >= nelem(charsets))
  713. goto err;
  714. // bail if it doesn't fit
  715. if(strlen(token) > sizeof(decoded)-1)
  716. goto err;
  717. // bail if we don't understand the encoding
  718. if(cistrncmp(token, "b?", 2) == 0){
  719. token += 2;
  720. len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
  721. decoded[len] = 0;
  722. } else if(cistrncmp(token, "q?", 2) == 0){
  723. token += 2;
  724. len = decquoted(decoded, token, e);
  725. if(len > 0 && decoded[len-1] == '\n')
  726. len--;
  727. decoded[len] = 0;
  728. } else
  729. goto err;
  730. s = nil;
  731. switch(charsets[i].convert){
  732. case 0:
  733. s = s_copy(decoded);
  734. break;
  735. case 1:
  736. s = s_new();
  737. latin1toutf(utfbuf, decoded, decoded+len);
  738. s_append(s, utfbuf);
  739. break;
  740. }
  741. return s;
  742. err:
  743. return s_clone(t);
  744. }
  745. /*
  746. * decode quoted
  747. */
  748. enum
  749. {
  750. Self= 1,
  751. Hex= 2,
  752. };
  753. uchar tableqp[256];
  754. static void
  755. initquoted(void)
  756. {
  757. int c;
  758. memset(tableqp, 0, 256);
  759. for(c = ' '; c <= '<'; c++)
  760. tableqp[c] = Self;
  761. for(c = '>'; c <= '~'; c++)
  762. tableqp[c] = Self;
  763. tableqp['\t'] = Self;
  764. tableqp['='] = Hex;
  765. }
  766. static int
  767. hex2int(int x)
  768. {
  769. if(x >= '0' && x <= '9')
  770. return x - '0';
  771. if(x >= 'A' && x <= 'F')
  772. return (x - 'A') + 10;
  773. if(x >= 'a' && x <= 'f')
  774. return (x - 'a') + 10;
  775. return 0;
  776. }
  777. static char*
  778. decquotedline(char *out, char *in, char *e)
  779. {
  780. int c, soft;
  781. /* dump trailing white space */
  782. while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
  783. e--;
  784. /* trailing '=' means no newline */
  785. if(*e == '='){
  786. soft = 1;
  787. e--;
  788. } else
  789. soft = 0;
  790. while(in <= e){
  791. c = (*in++) & 0xff;
  792. switch(tableqp[c]){
  793. case Self:
  794. *out++ = c;
  795. break;
  796. case Hex:
  797. c = hex2int(*in++)<<4;
  798. c |= hex2int(*in++);
  799. *out++ = c;
  800. break;
  801. }
  802. }
  803. if(!soft)
  804. *out++ = '\n';
  805. *out = 0;
  806. return out;
  807. }
  808. static int
  809. decquoted(char *out, char *in, char *e)
  810. {
  811. char *p, *nl;
  812. if(tableqp[' '] == 0)
  813. initquoted();
  814. p = out;
  815. while((nl = strchr(in, '\n')) != nil && nl < e){
  816. p = decquotedline(p, in, nl);
  817. in = nl + 1;
  818. }
  819. if(in < e)
  820. p = decquotedline(p, in, e-1);
  821. // make sure we end with a new line
  822. if(*(p-1) != '\n'){
  823. *p++ = '\n';
  824. *p = 0;
  825. }
  826. return p - out;
  827. }
  828. /* translate latin1 directly since it fits neatly in utf */
  829. static int
  830. latin1toutf(char *out, char *in, char *e)
  831. {
  832. Rune r;
  833. char *p;
  834. p = out;
  835. for(; in < e; in++){
  836. r = (*in) & 0xff;
  837. p += runetochar(p, &r);
  838. }
  839. *p = 0;
  840. return p - out;
  841. }