uuencode.c 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * Copyright 2003, Glenn McGrath
  4. * Copyright 2006, Rob Landley <rob@landley.net>
  5. * Copyright 2010, Denys Vlasenko
  6. *
  7. * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  8. */
  9. #include "libbb.h"
  10. /* Conversion tables */
  11. #if ENABLE_BASE32
  12. const char bb_uuenc_tbl_base32[] ALIGN1 = {
  13. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
  14. 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
  15. 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
  16. 'Y', 'Z', '2', '3', '4', '5', '6', '7',
  17. /* unused: '=', */
  18. };
  19. #endif
  20. /* for base 64 */
  21. const char bb_uuenc_tbl_base64[] ALIGN1 = {
  22. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
  23. 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
  24. 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
  25. 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
  26. 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  27. 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
  28. 'w', 'x', 'y', 'z', '0', '1', '2', '3',
  29. '4', '5', '6', '7', '8', '9', '+', '/',
  30. '=' /* termination character */
  31. };
  32. const char bb_uuenc_tbl_std[] ALIGN1 = {
  33. '`', '!', '"', '#', '$', '%', '&', '\'',
  34. '(', ')', '*', '+', ',', '-', '.', '/',
  35. '0', '1', '2', '3', '4', '5', '6', '7',
  36. '8', '9', ':', ';', '<', '=', '>', '?',
  37. '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
  38. 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
  39. 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
  40. 'X', 'Y', 'Z', '[', '\\',']', '^', '_',
  41. '`' /* termination character */
  42. };
  43. /*
  44. * Encode bytes at S of length LENGTH to uuencode or base64 format and place it
  45. * to STORE. STORE will be 0-terminated, and must point to a writable
  46. * buffer of at least 1+BASE64_LENGTH(length) bytes.
  47. * where BASE64_LENGTH(len) = (4 * ((LENGTH + 2) / 3))
  48. */
  49. void FAST_FUNC bb_uuencode(char *p, const void *src, int length, const char *tbl)
  50. {
  51. const unsigned char *s = src;
  52. /* Transform the 3x8 bits to 4x6 bits */
  53. while (length > 0) {
  54. unsigned s1, s2;
  55. /* Are s[1], s[2] valid or should be assumed 0? */
  56. s1 = s2 = 0;
  57. length -= 3; /* can be >=0, -1, -2 */
  58. if (length >= -1) {
  59. s1 = s[1];
  60. if (length >= 0)
  61. s2 = s[2];
  62. }
  63. *p++ = tbl[s[0] >> 2];
  64. *p++ = tbl[((s[0] & 3) << 4) + (s1 >> 4)];
  65. *p++ = tbl[((s1 & 0xf) << 2) + (s2 >> 6)];
  66. *p++ = tbl[s2 & 0x3f];
  67. s += 3;
  68. }
  69. /* Zero-terminate */
  70. *p = '\0';
  71. /* If length is -2 or -1, pad last char or two */
  72. while (length) {
  73. *--p = tbl[64];
  74. length++;
  75. }
  76. }
  77. /*
  78. * Decode base64 encoded string.
  79. *
  80. * Returns: pointer past the last written output byte,
  81. * the result is not NUL-terminated.
  82. * (*pp_src) is advanced past the last read byte.
  83. * If points to '\0', then the source was fully decoded.
  84. */
  85. char* FAST_FUNC decode_base64(char *dst, const char **pp_src)
  86. {
  87. const char *src = pp_src ? *pp_src : dst; /* for httpd.c, support NULL 2nd param */
  88. unsigned ch = 0;
  89. unsigned t;
  90. int i = 0;
  91. while ((t = (unsigned char)*src) != '\0') {
  92. src++;
  93. /* "if" forest is faster than strchr(bb_uuenc_tbl_base64, t) */
  94. if (t >= '0' && t <= '9')
  95. t = t - '0' + 52;
  96. else if (t >= 'A' && t <= 'Z')
  97. t = t - 'A';
  98. else if (t >= 'a' && t <= 'z')
  99. t = t - 'a' + 26;
  100. else if (t == '+')
  101. t = 62;
  102. else if (t == '/')
  103. t = 63;
  104. else if (t == '=' && (i == 3 || (i == 2 && *src == '=')))
  105. /* the above disallows "==AA", "A===", "AA=A" etc */
  106. t = 0x1000000;
  107. else
  108. //TODO: add BASE64_FLAG_foo to die on bad char?
  109. continue;
  110. ch = (ch << 6) | t;
  111. i = (i + 1) & 3;
  112. if (i == 0) {
  113. *dst++ = (char) (ch >> 16);
  114. *dst++ = (char) (ch >> 8);
  115. *dst++ = (char) ch;
  116. if (ch & 0x1000000) { /* was last input char '='? */
  117. dst--;
  118. if (ch & (0x1000000 << 6)) /* was it "=="? */
  119. dst--;
  120. break;
  121. }
  122. ch = 0;
  123. }
  124. }
  125. /* i is zero here if full 4-char block was decoded */
  126. if (pp_src)
  127. *pp_src = src - i; /* -i signals truncation: e.g. "MQ" and "MQ=" (correct encoding is "MQ==" -> "1") */
  128. return dst;
  129. }
  130. #if ENABLE_BASE32
  131. char* FAST_FUNC decode_base32(char *dst, const char **pp_src)
  132. {
  133. const char *src = *pp_src;
  134. uint64_t ch = 0;
  135. unsigned t;
  136. int i = 0;
  137. while ((t = (unsigned char)*src) != '\0') {
  138. src++;
  139. /* "if" forest is faster than strchr(bb_uuenc_tbl_base32, t) */
  140. if (t >= '2' && t <= '7')
  141. t = t - '2' + 26;
  142. else if (t == '=' && i > 1)
  143. t = 0;
  144. else {
  145. t = (t | 0x20) - 'a';
  146. if (t > 25)
  147. //TODO: add BASE64_FLAG_foo to die on bad char?
  148. continue;
  149. }
  150. ch = (ch << 5) | t;
  151. i = (i + 1) & 7;
  152. if (i == 0) {
  153. *dst++ = (char) (ch >> 32);
  154. if (src[-1] == '=') /* was last input char '='? */
  155. goto tail;
  156. *dst++ = (char) (ch >> 24);
  157. *dst++ = (char) (ch >> 16);
  158. *dst++ = (char) (ch >> 8);
  159. *dst++ = (char) ch;
  160. }
  161. }
  162. /* i is zero here if full 8-char block was decoded */
  163. *pp_src = src - i;
  164. return dst;
  165. tail:
  166. {
  167. const char *s = src;
  168. while (*--s == '=')
  169. i++;
  170. /* Why duplicate the below code? Testcase:
  171. * echo ' 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18' | base32 | base32 -d
  172. * IOW, decoding of
  173. * EAYSAMRAGMQDIIBVEA3CANZAHAQDSIBRGAQDCMJAGEZCAMJTEAYTIIBRGUQDCNRAGE3SAMJYBI==
  174. * ====
  175. * must correctly stitch together the tail, must not overwrite
  176. * the tail before it is analyzed! (we can be decoding in-place)
  177. * Else testcase fails, prints trailing extra NUL bytes.
  178. */
  179. *dst++ = (char) (ch >> 24);
  180. *dst++ = (char) (ch >> 16);
  181. *dst++ = (char) (ch >> 8);
  182. *dst++ = (char) ch;
  183. dst -= (i+1) * 2 / 3; /* discard last 1, 2, 3 or 4 bytes */
  184. }
  185. *pp_src = src;
  186. return dst;
  187. }
  188. #endif
  189. /*
  190. * Decode base64 encoded stream.
  191. * Can stop on EOF, specified char, or on uuencode-style "====" line:
  192. * flags argument controls it.
  193. */
  194. void FAST_FUNC read_base64(FILE *src_stream, FILE *dst_stream, int flags)
  195. {
  196. /* Note that EOF _can_ be passed as exit_char too */
  197. #define exit_char ((int)(signed char)flags)
  198. #define uu_style_end (flags & BASE64_FLAG_UU_STOP)
  199. #define base32 (flags & BASE64_32)
  200. /* uuencoded files have 61 byte lines.
  201. * base32/64 have 76 byte lines by default.
  202. * Use 80 byte buffer to process one line at a time.
  203. */
  204. enum { BUFFER_SIZE = 80 };
  205. /* decoded data is shorter than input, can use single buffer for both */
  206. char buf[BUFFER_SIZE + 2];
  207. int term_seen = 0;
  208. int in_count = 0;
  209. while (1) {
  210. char *out_tail;
  211. const char *in_tail;
  212. while (in_count < BUFFER_SIZE) {
  213. int ch = fgetc(src_stream);
  214. if (ch == exit_char) {
  215. if (in_count == 0)
  216. return;
  217. term_seen = 1;
  218. break;
  219. }
  220. if (ch == EOF) {
  221. term_seen = 1;
  222. break;
  223. }
  224. /* Prevent "====" line to be split: stop if we see '\n'.
  225. * We can also skip other whitespace and skirt the problem
  226. * of files with NULs by stopping on any control char or space:
  227. */
  228. if (ch <= ' ')
  229. break;
  230. buf[in_count++] = ch;
  231. }
  232. buf[in_count] = '\0';
  233. /* Did we encounter "====" line? */
  234. if (uu_style_end && strcmp(buf, "====") == 0)
  235. return;
  236. in_tail = buf;
  237. #if ENABLE_BASE32
  238. if (base32)
  239. out_tail = decode_base32(buf, &in_tail);
  240. else
  241. #endif
  242. out_tail = decode_base64(buf, &in_tail);
  243. fwrite(buf, (out_tail - buf), 1, dst_stream);
  244. if (term_seen) {
  245. /* Did we consume ALL characters? */
  246. if (*in_tail == '\0')
  247. return;
  248. /* No */
  249. bb_simple_error_msg_and_die("truncated input");
  250. }
  251. /* It was partial decode */
  252. in_count = strlen(in_tail);
  253. memmove(buf, in_tail, in_count);
  254. }
  255. }