reformime.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * reformime: parse MIME-encoded message
  4. *
  5. * Copyright (C) 2008 by Vladimir Dronnikov <dronnikov@gmail.com>
  6. *
  7. * Licensed under GPLv2, see file LICENSE in this source tree.
  8. */
  9. //config:config REFORMIME
  10. //config: bool "reformime (7.5 kb)"
  11. //config: default y
  12. //config: help
  13. //config: Parse MIME-formatted messages.
  14. //config:
  15. //config:config FEATURE_REFORMIME_COMPAT
  16. //config: bool "Accept and ignore options other than -x and -X"
  17. //config: default y
  18. //config: depends on REFORMIME
  19. //config: help
  20. //config: Accept (for compatibility only) and ignore options
  21. //config: other than -x and -X.
  22. //applet:IF_REFORMIME(APPLET(reformime, BB_DIR_BIN, BB_SUID_DROP))
  23. //kbuild:lib-$(CONFIG_REFORMIME) += reformime.o mail.o
  24. #include "libbb.h"
  25. #include "mail.h"
  26. #if 0
  27. # define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
  28. #else
  29. # define dbg_error_msg(...) ((void)0)
  30. #endif
  31. static const char *find_token(const char *const string_array[], const char *key, const char *defvalue)
  32. {
  33. const char *r = NULL;
  34. int i;
  35. for (i = 0; string_array[i] != NULL; i++) {
  36. if (strcasecmp(string_array[i], key) == 0) {
  37. r = (char *)string_array[i+1];
  38. break;
  39. }
  40. }
  41. return (r) ? r : defvalue;
  42. }
  43. static const char *xfind_token(const char *const string_array[], const char *key)
  44. {
  45. const char *r = find_token(string_array, key, NULL);
  46. if (r)
  47. return r;
  48. bb_error_msg_and_die("not found: '%s'", key);
  49. }
  50. enum {
  51. OPT_x = 1 << 0,
  52. OPT_X = 1 << 1,
  53. #if ENABLE_FEATURE_REFORMIME_COMPAT
  54. OPT_d = 1 << 2,
  55. OPT_e = 1 << 3,
  56. OPT_i = 1 << 4,
  57. OPT_s = 1 << 5,
  58. OPT_r = 1 << 6,
  59. OPT_c = 1 << 7,
  60. OPT_m = 1 << 8,
  61. OPT_h = 1 << 9,
  62. OPT_o = 1 << 10,
  63. OPT_O = 1 << 11,
  64. #endif
  65. };
  66. static int parse(const char *boundary, char **argv)
  67. {
  68. int boundary_len = strlen(boundary);
  69. char uniq[sizeof("%%llu.%u") + sizeof(int)*3];
  70. dbg_error_msg("BOUNDARY[%s]", boundary);
  71. // prepare unique string pattern
  72. sprintf(uniq, "%%llu.%u", (unsigned)getpid());
  73. dbg_error_msg("UNIQ[%s]", uniq);
  74. while (1) {
  75. char *header;
  76. const char *tokens[32]; /* 32 is enough */
  77. const char *type;
  78. /* Read the header (everything up to two \n) */
  79. {
  80. unsigned header_idx = 0;
  81. int last_ch = 0;
  82. header = NULL;
  83. while (1) {
  84. int ch = fgetc(stdin);
  85. if (ch == '\r') /* Support both line endings */
  86. continue;
  87. if (ch == EOF)
  88. break;
  89. if (ch == '\n' && last_ch == ch)
  90. break;
  91. if (!(header_idx & 0xff))
  92. header = xrealloc(header, header_idx + 0x101);
  93. header[header_idx++] = last_ch = ch;
  94. }
  95. if (!header) {
  96. dbg_error_msg("EOF");
  97. break;
  98. }
  99. header[header_idx] = '\0';
  100. dbg_error_msg("H:'%s'", p);
  101. }
  102. /* Split to tokens */
  103. {
  104. char *s, *p;
  105. char *tokstate;
  106. unsigned ntokens;
  107. const char *delims = ";=\" \t\n";
  108. /* Skip to last Content-Type: */
  109. s = p = header;
  110. while ((p = strchr(p, '\n')) != NULL) {
  111. p++;
  112. if (strncasecmp(p, "Content-Type:", sizeof("Content-Type:")-1) == 0)
  113. s = p;
  114. }
  115. dbg_error_msg("L:'%s'", p);
  116. ntokens = 0;
  117. s = strtok_r(s, delims, &tokstate);
  118. while (s) {
  119. tokens[ntokens] = s;
  120. if (ntokens < ARRAY_SIZE(tokens) - 1)
  121. ntokens++;
  122. dbg_error_msg("L[%d]='%s'", ntokens, s);
  123. s = strtok_r(NULL, delims, &tokstate);
  124. }
  125. tokens[ntokens] = NULL;
  126. dbg_error_msg("EMPTYLINE, ntokens:%d", ntokens);
  127. if (ntokens == 0)
  128. break;
  129. }
  130. /* Is it multipart? */
  131. type = find_token(tokens, "Content-Type:", "text/plain");
  132. dbg_error_msg("TYPE:'%s'", type);
  133. if (0 == strncasecmp(type, "multipart/", 10)) {
  134. /* Yes, recurse */
  135. if (strcasecmp(type + 10, "mixed") != 0)
  136. bb_error_msg_and_die("no support of content type '%s'", type);
  137. parse(xfind_token(tokens, "boundary"), argv);
  138. } else {
  139. /* No, process one non-multipart section */
  140. char *end;
  141. pid_t pid = pid;
  142. FILE *fp;
  143. const char *charset = find_token(tokens, "charset", CONFIG_FEATURE_MIME_CHARSET);
  144. const char *encoding = find_token(tokens, "Content-Transfer-Encoding:", "7bit");
  145. /* Compose target filename */
  146. char *filename = (char *)find_token(tokens, "filename", NULL);
  147. if (!filename)
  148. filename = xasprintf(uniq, monotonic_us());
  149. else
  150. filename = bb_get_last_path_component_strip(xstrdup(filename));
  151. if (option_mask32 & OPT_X) {
  152. int fd[2];
  153. /* start external helper */
  154. xpipe(fd);
  155. pid = vfork();
  156. if (0 == pid) {
  157. /* child reads from fd[0] */
  158. close(fd[1]);
  159. xmove_fd(fd[0], STDIN_FILENO);
  160. xsetenv("CONTENT_TYPE", type);
  161. xsetenv("CHARSET", charset);
  162. xsetenv("ENCODING", encoding);
  163. xsetenv("FILENAME", filename);
  164. BB_EXECVP_or_die(argv);
  165. }
  166. /* parent will write to fd[1] */
  167. close(fd[0]);
  168. fp = xfdopen_for_write(fd[1]);
  169. signal(SIGPIPE, SIG_IGN);
  170. } else {
  171. /* write to file */
  172. char *fname = xasprintf("%s%s", *argv, filename);
  173. fp = xfopen_for_write(fname);
  174. free(fname);
  175. }
  176. free(filename);
  177. /* write to fp */
  178. end = NULL;
  179. if (0 == strcasecmp(encoding, "base64")) {
  180. read_base64(stdin, fp, '-');
  181. } else
  182. if (0 != strcasecmp(encoding, "7bit")
  183. && 0 != strcasecmp(encoding, "8bit")
  184. ) {
  185. /* quoted-printable, binary, user-defined are unsupported so far */
  186. bb_error_msg_and_die("encoding '%s' not supported", encoding);
  187. } else {
  188. /* plain 7bit or 8bit */
  189. while ((end = xmalloc_fgets(stdin)) != NULL) {
  190. if ('-' == end[0]
  191. && '-' == end[1]
  192. && strncmp(end + 2, boundary, boundary_len) == 0
  193. ) {
  194. break;
  195. }
  196. fputs(end, fp);
  197. }
  198. }
  199. fclose(fp);
  200. /* Wait for child */
  201. if (option_mask32 & OPT_X) {
  202. int rc;
  203. signal(SIGPIPE, SIG_DFL);
  204. rc = (wait4pid(pid) & 0xff);
  205. if (rc != 0)
  206. return rc + 20;
  207. }
  208. /* Multipart ended? */
  209. if (end && '-' == end[2 + boundary_len] && '-' == end[2 + boundary_len + 1]) {
  210. dbg_error_msg("FINISHED MPART:'%s'", end);
  211. break;
  212. }
  213. dbg_error_msg("FINISHED:'%s'", end);
  214. free(end);
  215. } /* end of "handle one non-multipart block" */
  216. free(header);
  217. } /* while (1) */
  218. dbg_error_msg("ENDPARSE[%s]", boundary);
  219. return EXIT_SUCCESS;
  220. }
  221. //usage:#define reformime_trivial_usage
  222. //usage: "[OPTIONS]"
  223. //usage:#define reformime_full_usage "\n\n"
  224. //usage: "Parse MIME-encoded message on stdin\n"
  225. //usage: "\n -x PREFIX Extract content of MIME sections to files"
  226. //usage: "\n -X PROG ARGS Filter content of MIME sections through PROG"
  227. //usage: "\n Must be the last option"
  228. //usage: "\n"
  229. //usage: "\nOther options are silently ignored"
  230. /*
  231. Usage: reformime [options]
  232. -d - parse a delivery status notification.
  233. -e - extract contents of MIME section.
  234. -x - extract MIME section to a file.
  235. -X - pipe MIME section to a program.
  236. -i - show MIME info.
  237. -s n.n.n.n - specify MIME section.
  238. -r - rewrite message, filling in missing MIME headers.
  239. -r7 - also convert 8bit/raw encoding to quoted-printable, if possible.
  240. -r8 - also convert quoted-printable encoding to 8bit, if possible.
  241. -c charset - default charset for rewriting, -o, and -O.
  242. -m [file] [file]... - create a MIME message digest.
  243. -h "header" - decode RFC 2047-encoded header.
  244. -o "header" - encode unstructured header using RFC 2047.
  245. -O "header" - encode address list header using RFC 2047.
  246. */
  247. int reformime_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
  248. int reformime_main(int argc UNUSED_PARAM, char **argv)
  249. {
  250. unsigned opts;
  251. const char *opt_prefix = "";
  252. INIT_G();
  253. // parse options
  254. // N.B. only -x and -X are supported so far
  255. opts = getopt32(argv, "^"
  256. "x:X" IF_FEATURE_REFORMIME_COMPAT("deis:r:c:m:*h:o:O:")
  257. "\0" "x--X:X--x",
  258. &opt_prefix
  259. IF_FEATURE_REFORMIME_COMPAT(, NULL, NULL, &G.opt_charset, NULL, NULL, NULL, NULL)
  260. );
  261. argv += optind;
  262. return parse("", (opts & OPT_X) ? argv : (char **)&opt_prefix);
  263. }