vf.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925
  1. #include "common.h"
  2. #include <ctype.h>
  3. Biobuf in;
  4. Biobuf out;
  5. typedef struct Mtype Mtype;
  6. typedef struct Hdef Hdef;
  7. typedef struct Hline Hline;
  8. typedef struct Part Part;
  9. static int badfile(char *name);
  10. static int badtype(char *type);
  11. static void ctype(Part*, Hdef*, char*);
  12. static void cencoding(Part*, Hdef*, char*);
  13. static void cdisposition(Part*, Hdef*, char*);
  14. static int decquoted(char *out, char *in, char *e);
  15. static char* getstring(char *p, String *s, int dolower);
  16. static void init_hdefs(void);
  17. static int isattribute(char **pp, char *attr);
  18. static int latin1toutf(char *out, char *in, char *e);
  19. static String* mkboundary(void);
  20. static Part* part(Part *pp);
  21. static Part* passbody(Part *p, int dobound);
  22. static void passnotheader(void);
  23. static void passunixheader(void);
  24. static Part* problemchild(Part *p);
  25. static void readheader(Part *p);
  26. static Hline* readhl(void);
  27. static void readmtypes(void);
  28. static void setfilename(Part *p, char *name);
  29. static char* skiptosemi(char *p);
  30. static char* skipwhite(char *p);
  31. static String* tokenconvert(String *t);
  32. static void writeheader(Part *p);
  33. enum
  34. {
  35. // encodings
  36. Enone= 0,
  37. Ebase64,
  38. Equoted,
  39. // disposition possibilities
  40. Dnone= 0,
  41. Dinline,
  42. Dfile,
  43. Dignore,
  44. PAD64= '=',
  45. };
  46. /*
  47. * a message part; either the whole message or a subpart
  48. */
  49. struct Part
  50. {
  51. Part *pp; /* parent part */
  52. Hline *hl; /* linked list of header lines */
  53. int disposition;
  54. int encoding;
  55. int badfile;
  56. int badtype;
  57. String *boundary; /* boundary for multiparts */
  58. int blen;
  59. String *charset; /* character set */
  60. String *type; /* content type */
  61. String *filename; /* content type */
  62. };
  63. /*
  64. * a (multi)line header
  65. */
  66. struct Hline
  67. {
  68. Hline *next;
  69. String *s;
  70. };
  71. /*
  72. * header definitions for parsing
  73. */
  74. struct Hdef
  75. {
  76. char *type;
  77. void (*f)(Part*, Hdef*, char*);
  78. int len;
  79. };
  80. Hdef hdefs[] =
  81. {
  82. { "content-type:", ctype, },
  83. { "content-transfer-encoding:", cencoding, },
  84. { "content-disposition:", cdisposition, },
  85. { 0, },
  86. };
  87. /*
  88. * acceptable content types and their extensions
  89. */
  90. struct Mtype {
  91. Mtype *next;
  92. char *ext; /* extension */
  93. char *gtype; /* generic content type */
  94. char *stype; /* specific content type */
  95. char class;
  96. };
  97. Mtype *mtypes;
  98. /*
  99. * this is a filter that changes mime types and names of
  100. * suspect attachments.
  101. *
  102. */
  103. void
  104. main(int argc, char **argv)
  105. {
  106. ARGBEGIN{
  107. }ARGEND;
  108. Binit(&in, 0, OREAD);
  109. Binit(&out, 1, OWRITE);
  110. init_hdefs();
  111. readmtypes();
  112. /* pass through our standard 'From ' line */
  113. passunixheader();
  114. /* parse with the top level part */
  115. part(nil);
  116. exits(0);
  117. }
  118. /*
  119. * parse a part; returns the ancestor whose boundary terminated
  120. * this part or nil on EOF.
  121. */
  122. static Part*
  123. part(Part *pp)
  124. {
  125. Part *p, *np;
  126. p = mallocz(sizeof *p, 1);
  127. p->pp = pp;
  128. readheader(p);
  129. if(p->boundary != nil){
  130. /* the format of a multipart part is always:
  131. * header
  132. * null or ignored body
  133. * boundary
  134. * header
  135. * body
  136. * boundary
  137. * ...
  138. */
  139. writeheader(p);
  140. np = passbody(p, 1);
  141. if(np != p)
  142. return np;
  143. for(;;){
  144. np = part(p);
  145. if(np != p)
  146. return np;
  147. }
  148. } else {
  149. /* no boundary */
  150. /* may still be multipart if this is a forwarded message */
  151. if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){
  152. /* the format of forwarded message is:
  153. * header
  154. * header
  155. * body
  156. */
  157. writeheader(p);
  158. passnotheader();
  159. return part(p);
  160. } else {
  161. /* This is the meat. This may be an executable.
  162. * if so, wrap it and change its type
  163. */
  164. if(p->badtype || p->badfile){
  165. return problemchild(p);
  166. } else {
  167. writeheader(p);
  168. return passbody(p, 1);
  169. }
  170. }
  171. }
  172. }
  173. /*
  174. * read and parse a complete header
  175. */
  176. static void
  177. readheader(Part *p)
  178. {
  179. Hline *hl, **l;
  180. Hdef *hd;
  181. l = &p->hl;
  182. for(;;){
  183. hl = readhl();
  184. if(hl == nil)
  185. break;
  186. *l = hl;
  187. l = &hl->next;
  188. for(hd = hdefs; hd->type != nil; hd++){
  189. if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){
  190. (*hd->f)(p, hd, s_to_c(hl->s));
  191. break;
  192. }
  193. }
  194. }
  195. }
  196. /*
  197. * read a possibly multiline header line
  198. */
  199. static Hline*
  200. readhl(void)
  201. {
  202. Hline *hl;
  203. String *s;
  204. char *p;
  205. int n;
  206. p = Brdline(&in, '\n');
  207. if(p == nil)
  208. return nil;
  209. n = Blinelen(&in);
  210. if(memchr(p, ':', n) == nil){
  211. Bseek(&in, -n, 1);
  212. return nil;
  213. }
  214. s = s_nappend(s_new(), p, n);
  215. for(;;){
  216. p = Brdline(&in, '\n');
  217. if(p == nil)
  218. break;
  219. n = Blinelen(&in);
  220. if(*p != ' ' && *p != '\t'){
  221. Bseek(&in, -n, 1);
  222. break;
  223. }
  224. s = s_nappend(s, p, n);
  225. }
  226. hl = malloc(sizeof *hl);
  227. hl->s = s;
  228. hl->next = nil;
  229. return hl;
  230. }
  231. /*
  232. * write out a complete header
  233. */
  234. static void
  235. writeheader(Part *p)
  236. {
  237. Hline *hl, *next;
  238. for(hl = p->hl; hl != nil; hl = next){
  239. Bprint(&out, "%s", s_to_c(hl->s));
  240. s_free(hl->s);
  241. next = hl->next;
  242. free(hl);
  243. }
  244. p->hl = nil;
  245. }
  246. /*
  247. * pass a body through. return if we hit one of our ancestors'
  248. * boundaries or EOF. if we hit a boundary, return a pointer to
  249. * that ancestor. if we hit EOF, return nil.
  250. */
  251. static Part*
  252. passbody(Part *p, int dobound)
  253. {
  254. Part *pp;
  255. char *cp;
  256. for(;;){
  257. cp = Brdline(&in, '\n');
  258. if(cp == nil)
  259. return nil;
  260. for(pp = p; pp != nil; pp = pp->pp)
  261. if(pp->boundary != nil
  262. && strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){
  263. if(dobound)
  264. Bwrite(&out, cp, Blinelen(&in));
  265. else
  266. Bseek(&in, -Blinelen(&in), 1);
  267. return pp;
  268. }
  269. Bwrite(&out, cp, Blinelen(&in));
  270. }
  271. return nil;
  272. }
  273. /*
  274. * emit a multipart Part that explains the problem
  275. */
  276. static Part*
  277. problemchild(Part *p)
  278. {
  279. Part *np;
  280. Hline *hl;
  281. String *boundary;
  282. char *cp;
  283. syslog(0, "mail", "vf %s %s", p->type?s_to_c(p->type):"?",
  284. p->filename?s_to_c(p->filename):"?");
  285. boundary = mkboundary();
  286. /* print out non-mime headers */
  287. for(hl = p->hl; hl != nil; hl = hl->next)
  288. if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0)
  289. Bprint(&out, "%s", s_to_c(hl->s));
  290. /* add in out own multipart headers and message */
  291. Bprint(&out, "Content-Type: multipart/mixed;\n");
  292. Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary));
  293. Bprint(&out, "Content-Disposition: inline\n");
  294. Bprint(&out, "\n");
  295. Bprint(&out, "This is a multi-part message in MIME format.\n");
  296. Bprint(&out, "--%s\n", s_to_c(boundary));
  297. Bprint(&out, "Content-Disposition: inline\n");
  298. Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n");
  299. Bprint(&out, "Content-Transfer-Encoding: 7bit\n");
  300. Bprint(&out, "\n");
  301. Bprint(&out, "The following attachment had content that we can't\n");
  302. Bprint(&out, "prove to be harmless. To avoid possible automatic\n");
  303. Bprint(&out, "execution, we changed the content headers.\n");
  304. Bprint(&out, "The original header was:\n\n");
  305. /* print out original header lines */
  306. for(hl = p->hl; hl != nil; hl = hl->next)
  307. if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0)
  308. Bprint(&out, "\t%s", s_to_c(hl->s));
  309. Bprint(&out, "--%s\n", s_to_c(boundary));
  310. /* change file name */
  311. if(p->filename)
  312. s_append(p->filename, ".suspect");
  313. else
  314. p->filename = s_copy("file.suspect");
  315. /* print out new header */
  316. Bprint(&out, "Content-Type: application/octet-stream\n");
  317. Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename));
  318. switch(p->encoding){
  319. case Enone:
  320. break;
  321. case Ebase64:
  322. Bprint(&out, "Content-Transfer-Encoding: base64\n");
  323. break;
  324. case Equoted:
  325. Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n");
  326. break;
  327. }
  328. /* pass the body */
  329. np = passbody(p, 0);
  330. /* add the new boundary and the original terminator */
  331. Bprint(&out, "--%s--\n", s_to_c(boundary));
  332. if(np && np->boundary){
  333. cp = Brdline(&in, '\n');
  334. Bwrite(&out, cp, Blinelen(&in));
  335. }
  336. return np;
  337. }
  338. static int
  339. isattribute(char **pp, char *attr)
  340. {
  341. char *p;
  342. int n;
  343. n = strlen(attr);
  344. p = *pp;
  345. if(cistrncmp(p, attr, n) != 0)
  346. return 0;
  347. p += n;
  348. while(*p == ' ')
  349. p++;
  350. if(*p++ != '=')
  351. return 0;
  352. while(*p == ' ')
  353. p++;
  354. *pp = p;
  355. return 1;
  356. }
  357. /*
  358. * parse content type header
  359. */
  360. static void
  361. ctype(Part *p, Hdef *h, char *cp)
  362. {
  363. String *s;
  364. cp += h->len;
  365. cp = skipwhite(cp);
  366. p->type = s_new();
  367. cp = getstring(cp, p->type, 1);
  368. if(badtype(s_to_c(p->type)))
  369. p->badtype = 1;
  370. while(*cp){
  371. if(isattribute(&cp, "boundary")){
  372. s = s_new();
  373. cp = getstring(cp, s, 0);
  374. p->boundary = s_reset(p->boundary);
  375. s_append(p->boundary, "--");
  376. s_append(p->boundary, s_to_c(s));
  377. p->blen = s_len(p->boundary);
  378. s_free(s);
  379. } else if(cistrncmp(cp, "multipart", 9) == 0){
  380. /*
  381. * the first unbounded part of a multipart message,
  382. * the preamble, is not displayed or saved
  383. */
  384. } else if(isattribute(&cp, "name")){
  385. setfilename(p, cp);
  386. } else if(isattribute(&cp, "charset")){
  387. if(p->charset == nil)
  388. p->charset = s_new();
  389. cp = getstring(cp, s_reset(p->charset), 0);
  390. }
  391. cp = skiptosemi(cp);
  392. }
  393. }
  394. /*
  395. * parse content encoding header
  396. */
  397. static void
  398. cencoding(Part *m, Hdef *h, char *p)
  399. {
  400. p += h->len;
  401. p = skipwhite(p);
  402. if(cistrncmp(p, "base64", 6) == 0)
  403. m->encoding = Ebase64;
  404. else if(cistrncmp(p, "quoted-printable", 16) == 0)
  405. m->encoding = Equoted;
  406. }
  407. /*
  408. * parse content disposition header
  409. */
  410. static void
  411. cdisposition(Part *p, Hdef *h, char *cp)
  412. {
  413. cp += h->len;
  414. cp = skipwhite(cp);
  415. while(*cp){
  416. if(cistrncmp(cp, "inline", 6) == 0){
  417. p->disposition = Dinline;
  418. } else if(cistrncmp(cp, "attachment", 10) == 0){
  419. p->disposition = Dfile;
  420. } else if(cistrncmp(cp, "filename=", 9) == 0){
  421. cp += 9;
  422. setfilename(p, cp);
  423. }
  424. cp = skiptosemi(cp);
  425. }
  426. }
  427. static void
  428. setfilename(Part *p, char *name)
  429. {
  430. if(p->filename == nil)
  431. p->filename = s_new();
  432. getstring(name, s_reset(p->filename), 0);
  433. p->filename = tokenconvert(p->filename);
  434. if(badfile(s_to_c(p->filename)))
  435. p->badfile = 1;
  436. }
  437. static char*
  438. skipwhite(char *p)
  439. {
  440. while(isspace(*p))
  441. p++;
  442. return p;
  443. }
  444. static char*
  445. skiptosemi(char *p)
  446. {
  447. while(*p && *p != ';')
  448. p++;
  449. while(*p == ';' || isspace(*p))
  450. p++;
  451. return p;
  452. }
  453. /*
  454. * parse a possibly "'d string from a header. A
  455. * ';' terminates the string.
  456. */
  457. static char*
  458. getstring(char *p, String *s, int dolower)
  459. {
  460. s = s_reset(s);
  461. p = skipwhite(p);
  462. if(*p == '"'){
  463. p++;
  464. for(;*p && *p != '"'; p++)
  465. if(dolower)
  466. s_putc(s, tolower(*p));
  467. else
  468. s_putc(s, *p);
  469. if(*p == '"')
  470. p++;
  471. s_terminate(s);
  472. return p;
  473. }
  474. for(; *p && !isspace(*p) && *p != ';'; p++)
  475. if(dolower)
  476. s_putc(s, tolower(*p));
  477. else
  478. s_putc(s, *p);
  479. s_terminate(s);
  480. return p;
  481. }
  482. static void
  483. init_hdefs(void)
  484. {
  485. Hdef *hd;
  486. static int already;
  487. if(already)
  488. return;
  489. already = 1;
  490. for(hd = hdefs; hd->type != nil; hd++)
  491. hd->len = strlen(hd->type);
  492. }
  493. /*
  494. * create a new boundary
  495. */
  496. static String*
  497. mkboundary(void)
  498. {
  499. char buf[32];
  500. int i;
  501. static int already;
  502. if(already == 0){
  503. srand((time(0)<<16)|getpid());
  504. already = 1;
  505. }
  506. strcpy(buf, "upas-");
  507. for(i = 5; i < sizeof(buf)-1; i++)
  508. buf[i] = 'a' + nrand(26);
  509. buf[i] = 0;
  510. return s_copy(buf);
  511. }
  512. /*
  513. * skip blank lines till header
  514. */
  515. static void
  516. passnotheader(void)
  517. {
  518. char *cp;
  519. int i, n;
  520. while((cp = Brdline(&in, '\n')) != nil){
  521. n = Blinelen(&in);
  522. for(i = 0; i < n-1; i++)
  523. if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){
  524. Bseek(&in, -n, 1);
  525. return;
  526. }
  527. Bwrite(&out, cp, n);
  528. }
  529. }
  530. /*
  531. * pass unix header lines
  532. */
  533. static void
  534. passunixheader(void)
  535. {
  536. char *p;
  537. int n;
  538. while((p = Brdline(&in, '\n')) != nil){
  539. n = Blinelen(&in);
  540. if(strncmp(p, "From ", 5) != 0){
  541. Bseek(&in, -n, 1);
  542. break;
  543. }
  544. Bwrite(&out, p, n);
  545. }
  546. }
  547. /*
  548. * Read mime types
  549. */
  550. static void
  551. readmtypes(void)
  552. {
  553. Biobuf *b;
  554. char *p;
  555. char *f[6];
  556. Mtype *m;
  557. Mtype **l;
  558. b = Bopen("/sys/lib/mimetype", OREAD);
  559. if(b == nil)
  560. return;
  561. l = &mtypes;
  562. while((p = Brdline(b, '\n')) != nil){
  563. if(*p == '#')
  564. continue;
  565. p[Blinelen(b)-1] = 0;
  566. if(tokenize(p, f, nelem(f)) < 5)
  567. continue;
  568. m = mallocz(sizeof *m, 1);
  569. if(m == nil)
  570. goto err;
  571. m->ext = strdup(f[0]);
  572. if(m->ext == 0)
  573. goto err;
  574. m->gtype = strdup(f[1]);
  575. if(m->gtype == 0)
  576. goto err;
  577. m->stype = strdup(f[2]);
  578. if(m->stype == 0)
  579. goto err;
  580. m->class = *f[4];
  581. *l = m;
  582. l = &(m->next);
  583. }
  584. Bterm(b);
  585. return;
  586. err:
  587. if(m == nil)
  588. return;
  589. free(m->ext);
  590. free(m->gtype);
  591. free(m->stype);
  592. free(m);
  593. Bterm(b);
  594. }
  595. /*
  596. * if the class is 'm' or 'y', accept it
  597. * if the class is 'p' check a previous extension
  598. * otherwise, filename is bad
  599. */
  600. static int
  601. badfile(char *name)
  602. {
  603. char *p;
  604. Mtype *m;
  605. int rv;
  606. p = strrchr(name, '.');
  607. if(p == nil)
  608. return 0;
  609. for(m = mtypes; m != nil; m = m->next)
  610. if(cistrcmp(p, m->ext) == 0){
  611. switch(m->class){
  612. case 'm':
  613. case 'y':
  614. return 0;
  615. case 'p':
  616. *p = 0;
  617. rv = badfile(name);
  618. *p = '.';
  619. return rv;
  620. }
  621. }
  622. return 1;
  623. }
  624. /*
  625. * if the class is 'm' or 'y' or 'p', accept it
  626. * otherwise, filename is bad
  627. */
  628. static int
  629. badtype(char *type)
  630. {
  631. Mtype *m;
  632. char *s, *fix;
  633. int rv = 1;
  634. fix = s = strchr(type, '/');
  635. if(s != nil)
  636. *s++ = 0;
  637. else
  638. s = "-";
  639. for(m = mtypes; m != nil; m = m->next){
  640. if(cistrcmp(type, m->gtype) != 0)
  641. continue;
  642. if(cistrcmp(s, m->stype) != 0)
  643. continue;
  644. switch(m->class){
  645. case 'y':
  646. case 'p':
  647. case 'm':
  648. rv = 0;
  649. break;
  650. }
  651. break;
  652. }
  653. if(fix != nil)
  654. *fix = '/';
  655. return rv;
  656. }
  657. /* rfc2047 non-ascii */
  658. typedef struct Charset Charset;
  659. struct Charset {
  660. char *name;
  661. int len;
  662. int convert;
  663. } charsets[] =
  664. {
  665. { "us-ascii", 8, 1, },
  666. { "utf-8", 5, 0, },
  667. { "iso-8859-1", 10, 1, },
  668. };
  669. /*
  670. * convert to UTF if need be
  671. */
  672. static String*
  673. tokenconvert(String *t)
  674. {
  675. String *s;
  676. char decoded[1024];
  677. char utfbuf[2*1024];
  678. int i, len;
  679. char *e;
  680. char *token;
  681. token = s_to_c(t);
  682. len = s_len(t);
  683. if(token[0] != '=' || token[1] != '?' ||
  684. token[len-2] != '?' || token[len-1] != '=')
  685. goto err;
  686. e = token+len-2;
  687. token += 2;
  688. // bail if we don't understand the character set
  689. for(i = 0; i < nelem(charsets); i++)
  690. if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0)
  691. if(token[charsets[i].len] == '?'){
  692. token += charsets[i].len + 1;
  693. break;
  694. }
  695. if(i >= nelem(charsets))
  696. goto err;
  697. // bail if it doesn't fit
  698. if(strlen(token) > sizeof(decoded)-1)
  699. goto err;
  700. // bail if we don't understand the encoding
  701. if(cistrncmp(token, "b?", 2) == 0){
  702. token += 2;
  703. len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
  704. decoded[len] = 0;
  705. } else if(cistrncmp(token, "q?", 2) == 0){
  706. token += 2;
  707. len = decquoted(decoded, token, e);
  708. if(len > 0 && decoded[len-1] == '\n')
  709. len--;
  710. decoded[len] = 0;
  711. } else
  712. goto err;
  713. s = nil;
  714. switch(charsets[i].convert){
  715. case 0:
  716. s = s_copy(decoded);
  717. break;
  718. case 1:
  719. s = s_new();
  720. latin1toutf(utfbuf, decoded, decoded+len);
  721. s_append(s, utfbuf);
  722. break;
  723. }
  724. return s;
  725. err:
  726. return s_clone(t);
  727. }
  728. /*
  729. * decode quoted
  730. */
  731. enum
  732. {
  733. Self= 1,
  734. Hex= 2,
  735. };
  736. uchar tableqp[256];
  737. static void
  738. initquoted(void)
  739. {
  740. int c;
  741. memset(tableqp, 0, 256);
  742. for(c = ' '; c <= '<'; c++)
  743. tableqp[c] = Self;
  744. for(c = '>'; c <= '~'; c++)
  745. tableqp[c] = Self;
  746. tableqp['\t'] = Self;
  747. tableqp['='] = Hex;
  748. }
  749. static int
  750. hex2int(int x)
  751. {
  752. if(x >= '0' && x <= '9')
  753. return x - '0';
  754. if(x >= 'A' && x <= 'F')
  755. return (x - 'A') + 10;
  756. if(x >= 'a' && x <= 'f')
  757. return (x - 'a') + 10;
  758. return 0;
  759. }
  760. static char*
  761. decquotedline(char *out, char *in, char *e)
  762. {
  763. int c, soft;
  764. /* dump trailing white space */
  765. while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
  766. e--;
  767. /* trailing '=' means no newline */
  768. if(*e == '='){
  769. soft = 1;
  770. e--;
  771. } else
  772. soft = 0;
  773. while(in <= e){
  774. c = (*in++) & 0xff;
  775. switch(tableqp[c]){
  776. case Self:
  777. *out++ = c;
  778. break;
  779. case Hex:
  780. c = hex2int(*in++)<<4;
  781. c |= hex2int(*in++);
  782. *out++ = c;
  783. break;
  784. }
  785. }
  786. if(!soft)
  787. *out++ = '\n';
  788. *out = 0;
  789. return out;
  790. }
  791. static int
  792. decquoted(char *out, char *in, char *e)
  793. {
  794. char *p, *nl;
  795. if(tableqp[' '] == 0)
  796. initquoted();
  797. p = out;
  798. while((nl = strchr(in, '\n')) != nil && nl < e){
  799. p = decquotedline(p, in, nl);
  800. in = nl + 1;
  801. }
  802. if(in < e)
  803. p = decquotedline(p, in, e-1);
  804. // make sure we end with a new line
  805. if(*(p-1) != '\n'){
  806. *p++ = '\n';
  807. *p = 0;
  808. }
  809. return p - out;
  810. }
  811. /* translate latin1 directly since it fits neatly in utf */
  812. static int
  813. latin1toutf(char *out, char *in, char *e)
  814. {
  815. Rune r;
  816. char *p;
  817. p = out;
  818. for(; in < e; in++){
  819. r = (*in) & 0xff;
  820. p += runetochar(p, &r);
  821. }
  822. *p = 0;
  823. return p - out;
  824. }