hexdump_xxd.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. /* vi: set sw=4 ts=4: */
  2. /*
  3. * xxd implementation for busybox
  4. *
  5. * Copyright (c) 2017 Denys Vlasenko <vda.linux@gmail.com>
  6. *
  7. * Licensed under GPLv2, see file LICENSE in this source tree.
  8. */
  9. //config:config XXD
  10. //config: bool "xxd (11 kb)"
  11. //config: default y
  12. //config: help
  13. //config: The xxd utility is used to display binary data in a readable
  14. //config: way that is comparable to the output from most hex editors.
  15. //applet:IF_XXD(APPLET_NOEXEC(xxd, xxd, BB_DIR_USR_BIN, BB_SUID_DROP, xxd))
  16. //kbuild:lib-$(CONFIG_XXD) += hexdump_xxd.o
  17. // $ xxd --version
  18. // xxd V1.10 27oct98 by Juergen Weigert
  19. // $ xxd --help
  20. // Usage:
  21. // xxd [options] [infile [outfile]]
  22. // or
  23. // xxd -r [-s [-]offset] [-c cols] [-ps] [infile [outfile]]
  24. // Options:
  25. // -a toggle autoskip: A single '*' replaces nul-lines. Default off.
  26. // -b binary digit dump (incompatible with -ps,-i,-r). Default hex.
  27. // -c cols format <cols> octets per line. Default 16 (-i: 12, -ps: 30).
  28. // -E show characters in EBCDIC. Default ASCII.
  29. // -e little-endian dump (incompatible with -ps,-i,-r).
  30. // -g number of octets per group in normal output. Default 2 (-e: 4).
  31. // -i output in C include file style.
  32. // -l len stop after <len> octets.
  33. // -o off add <off> to the displayed file position.
  34. // -ps output in postscript plain hexdump style.
  35. // -r reverse operation: convert (or patch) hexdump into binary.
  36. // -r -s off revert with <off> added to file positions found in hexdump.
  37. // -s [+][-]seek start at <seek> bytes abs. (or +: rel.) infile offset.
  38. // -u use upper case hex letters.
  39. //usage:#define xxd_trivial_usage
  40. //usage: "[-pri] [-g N] [-c N] [-l LEN] [-s OFS] [-o OFS] [FILE]"
  41. //usage:#define xxd_full_usage "\n\n"
  42. //usage: "Hex dump FILE (or stdin)\n"
  43. //usage: "\n -g N Bytes per group"
  44. //usage: "\n -c N Bytes per line"
  45. //usage: "\n -p Show only hex bytes, assumes -c30"
  46. //usage: "\n -i C include file style"
  47. // exactly the same help text lines in hexdump and xxd:
  48. //usage: "\n -l LENGTH Show only first LENGTH bytes"
  49. //usage: "\n -s OFFSET Skip OFFSET bytes"
  50. //usage: "\n -o OFFSET Add OFFSET to displayed offset"
  51. //usage: "\n -r Reverse (with -p, assumes no offsets in input)"
  52. #include "libbb.h"
  53. #include "common_bufsiz.h"
  54. #include "dump.h"
  55. /* This is a NOEXEC applet. Be very careful! */
  56. #define OPT_l (1 << 0)
  57. #define OPT_s (1 << 1)
  58. #define OPT_a (1 << 2)
  59. #define OPT_p (1 << 3)
  60. #define OPT_i (1 << 4)
  61. #define OPT_r (1 << 5)
  62. #define OPT_g (1 << 6)
  63. #define OPT_c (1 << 7)
  64. #define OPT_o (1 << 8)
  65. #define fillbuf bb_common_bufsiz1
  66. static void write_zeros(off_t count)
  67. {
  68. errno = 0;
  69. do {
  70. unsigned sz = count < COMMON_BUFSIZE ? (unsigned)count : COMMON_BUFSIZE;
  71. if (fwrite(fillbuf, 1, sz, stdout) != sz)
  72. bb_simple_perror_msg_and_die("write error");
  73. count -= sz;
  74. } while (count != 0);
  75. }
  76. static void reverse(unsigned opt, const char *filename, char *opt_s)
  77. {
  78. FILE *fp;
  79. char *buf;
  80. off_t cur, opt_s_ofs;
  81. memset(fillbuf, 0, COMMON_BUFSIZE);
  82. opt_s_ofs = cur = 0;
  83. if (opt_s) {
  84. opt_s_ofs = BB_STRTOOFF(opt_s, NULL, 0);
  85. if (errno || opt_s_ofs < 0)
  86. bb_error_msg_and_die("invalid number '%s'", opt_s);
  87. }
  88. fp = filename ? xfopen_for_read(filename) : stdin;
  89. get_new_line:
  90. while ((buf = xmalloc_fgetline(fp)) != NULL) {
  91. char *p;
  92. p = buf;
  93. if (!(opt & OPT_p)) {
  94. char *end;
  95. off_t ofs;
  96. skip_address:
  97. p = skip_whitespace(p);
  98. ofs = BB_STRTOOFF(p, &end, 16);
  99. if ((errno && errno != EINVAL)
  100. || ofs < 0
  101. /* -s SEEK value should be added before seeking */
  102. || (ofs += opt_s_ofs) < 0
  103. ) {
  104. bb_error_msg_and_die("invalid number '%s'", p);
  105. }
  106. if (ofs != cur) {
  107. if (fseeko(stdout, ofs, SEEK_SET) != 0) {
  108. if (ofs < cur)
  109. bb_simple_perror_msg_and_die("cannot seek");
  110. write_zeros(ofs - cur);
  111. }
  112. cur = ofs;
  113. }
  114. p = end;
  115. /* NB: for xxd -r, first hex portion is address even without colon */
  116. /* But if colon is there, skip it: */
  117. if (*p == ':')
  118. p++;
  119. }
  120. /* Process hex bytes optionally separated by whitespace */
  121. for (;;) {
  122. uint8_t val, c;
  123. int badchar = 0;
  124. nibble1:
  125. if (opt & OPT_p)
  126. p = skip_whitespace(p);
  127. c = *p++;
  128. if (isdigit(c))
  129. val = c - '0';
  130. else if ((c|0x20) >= 'a' && (c|0x20) <= 'f')
  131. val = (c|0x20) - ('a' - 10);
  132. else {
  133. /* xxd V1.10 allows one non-hexnum char:
  134. * echo -e "31 !3 0a 0a" | xxd -r -p
  135. * is "10<a0>" (no <cr>) - "!" is ignored,
  136. * but stops for more than one:
  137. * echo -e "31 !!343434\n30 0a" | xxd -r -p
  138. * is "10<cr>" - "!!" drops rest of the line.
  139. * Note: this also covers whitespace chars:
  140. * xxxxxxxx: 3031 3233 3435 3637 3839 3a3b 3c3d 3e3f 0123456789:;<=>?
  141. * detects this ^ - skips this one space
  142. * xxxxxxxx: 3031 3233 3435 3637 3839 3a3b 3c3d 3e3f 0123456789:;<=>?
  143. * detects this ^^ - skips the rest
  144. */
  145. if (c == '\0' || badchar)
  146. break;
  147. badchar++;
  148. goto nibble1;
  149. }
  150. val <<= 4;
  151. nibble2:
  152. if (opt & OPT_p) {
  153. /* Works the same with xxd V1.10:
  154. * echo "31 09 32 0a" | xxd -r -p
  155. * echo "31 0 9 32 0a" | xxd -r -p
  156. * thus allow whitespace (even multiple chars)
  157. * after byte's 1st char:
  158. */
  159. p = skip_whitespace(p);
  160. }
  161. c = *p++;
  162. if (isdigit(c))
  163. val |= c - '0';
  164. else if ((c|0x20) >= 'a' && (c|0x20) <= 'f')
  165. val |= (c|0x20) - ('a' - 10);
  166. else {
  167. if (c != '\0') {
  168. /* "...3<not_hex_char>...": ignore "3",
  169. * skip everything up to next hexchar or newline:
  170. */
  171. while (!isxdigit(*p)) {
  172. if (*p == '\0') {
  173. free(buf);
  174. goto get_new_line;
  175. }
  176. p++;
  177. }
  178. goto nibble1;
  179. }
  180. /* Nibbles can join even through newline:
  181. * echo -e "31 3\n2 0a" | xxd -r -p
  182. * is "12<cr>".
  183. */
  184. free(buf);
  185. p = buf = xmalloc_fgetline(fp);
  186. if (!buf)
  187. break;
  188. if (!(opt & OPT_p)) /* -p and !-p: different behavior */
  189. goto skip_address;
  190. goto nibble2;
  191. }
  192. putchar(val);
  193. cur++;
  194. } /* for(;;) */
  195. free(buf);
  196. }
  197. //fclose(fp);
  198. fflush_stdout_and_exit_SUCCESS();
  199. }
  200. static void print_C_style(const char *p, const char *hdr)
  201. {
  202. printf(hdr, isdigit(p[0]) ? "__" : "");
  203. while (*p) {
  204. bb_putchar(isalnum(*p) ? *p : '_');
  205. p++;
  206. }
  207. }
  208. int xxd_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
  209. int xxd_main(int argc UNUSED_PARAM, char **argv)
  210. {
  211. char buf[80];
  212. dumper_t *dumper;
  213. char *opt_l, *opt_o;
  214. char *opt_s = NULL;
  215. unsigned bytes = 2;
  216. unsigned cols = 0;
  217. unsigned opt;
  218. int r;
  219. setup_common_bufsiz();
  220. dumper = alloc_dumper();
  221. opt = getopt32(argv, "^" "l:s:apirg:+c:+o:" "\0" "?1" /* 1 argument max */,
  222. &opt_l, &opt_s, &bytes, &cols, &opt_o
  223. );
  224. argv += optind;
  225. dumper->dump_vflag = ALL;
  226. // if (opt & OPT_a)
  227. // dumper->dump_vflag = SKIPNUL; ..does not exist
  228. if (opt & OPT_l) {
  229. dumper->dump_length = xstrtou_range(
  230. opt_l,
  231. /*base:*/ 0,
  232. /*lo:*/ 0, /*hi:*/ INT_MAX
  233. );
  234. }
  235. if (opt & OPT_s) {
  236. dumper->dump_skip = xstrtoull_range(
  237. opt_s,
  238. /*base:*/ 0,
  239. /*lo:*/ 0, /*hi:*/ OFF_T_MAX
  240. );
  241. //BUGGY for /proc/version (unseekable?)
  242. }
  243. if (opt & OPT_r) {
  244. reverse(opt, argv[0], opt_s);
  245. }
  246. if (opt & OPT_o) {
  247. /* -o accepts negative numbers too */
  248. dumper->xxd_displayoff = xstrtoll(opt_o, /*base:*/ 0);
  249. }
  250. if (opt & OPT_p) {
  251. if (cols == 0)
  252. cols = 30;
  253. bytes = cols; /* -p ignores -gN */
  254. } else {
  255. if (cols == 0)
  256. cols = (opt & OPT_i) ? 12 : 16;
  257. if (opt & OPT_i) {
  258. bytes = 1; // -i ignores -gN
  259. // output is " 0xXX, 0xXX, 0xXX...", add leading space
  260. bb_dump_add(dumper, "\" \"");
  261. } else
  262. bb_dump_add(dumper, "\"%08_ax: \""); // "address: "
  263. }
  264. if (bytes < 1 || bytes >= cols) {
  265. sprintf(buf, "%u/1 \"%%02x\"", cols); // cols * "XX"
  266. bb_dump_add(dumper, buf);
  267. }
  268. else if (bytes == 1) {
  269. if (opt & OPT_i)
  270. sprintf(buf, "%u/1 \" 0x%%02x,\"", cols); // cols * " 0xXX,"
  271. //TODO: compat: omit the last comma after the very last byte
  272. else
  273. sprintf(buf, "%u/1 \"%%02x \"", cols); // cols * "XX "
  274. bb_dump_add(dumper, buf);
  275. }
  276. else {
  277. /* Format "print byte" with and without trailing space */
  278. #define BS "/1 \"%02x \""
  279. #define B "/1 \"%02x\""
  280. unsigned i;
  281. char *bigbuf = xmalloc(cols * (sizeof(BS)-1));
  282. char *p = bigbuf;
  283. for (i = 1; i <= cols; i++) {
  284. if (i == cols || i % bytes)
  285. p = stpcpy(p, B);
  286. else
  287. p = stpcpy(p, BS);
  288. }
  289. // for -g3, this results in B B BS B B BS... B = "xxxxxx xxxxxx .....xx"
  290. // todo: can be more clever and use
  291. // one 'bytes-1/1 "%02x"' format instead of many "B B B..." formats
  292. //bb_error_msg("ADDED:'%s'", bigbuf);
  293. bb_dump_add(dumper, bigbuf);
  294. free(bigbuf);
  295. }
  296. if (!(opt & (OPT_p|OPT_i))) {
  297. sprintf(buf, "\" \"%u/1 \"%%_p\"\"\n\"", cols); // " ASCII\n"
  298. bb_dump_add(dumper, buf);
  299. } else {
  300. bb_dump_add(dumper, "\"\n\"");
  301. dumper->xxd_eofstring = "\n";
  302. }
  303. if ((opt & OPT_i) && argv[0]) {
  304. print_C_style(argv[0], "unsigned char %s");
  305. printf("[] = {\n");
  306. }
  307. r = bb_dump_dump(dumper, argv);
  308. if (r == 0 && (opt & OPT_i) && argv[0]) {
  309. print_C_style(argv[0], "};\nunsigned int %s");
  310. printf("_len = %"OFF_FMT"u;\n", dumper->address);
  311. }
  312. return r;
  313. }