keyfinder.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. /*
  2. * Keyfinder - finds crypto keys, encrypted data and compressed data in files
  3. * by analyzing the entropy of parts of the file.
  4. *
  5. * (c) 2005 by van Hauser / THC <vh@thc.org> www.thc.org
  6. * The GPL 2.0 applies to this code.
  7. *
  8. * Based on the paper "Playing hide and seek with stored keys" by Shamir and
  9. * van Someren. www.ncipher.com/products/files/papers/anguilla/keyhide2.pdf
  10. *
  11. * In my experiments I went however a different route to identify keys which
  12. * seems to be better when identifying keys.
  13. * The paper evaluates 60 byte chunks on their entropy, and depending on the
  14. * number of consecutive chunks with high entropies, this could be the key.
  15. * This tool evalutes the full key size for the entropy, increasing by an
  16. * approx 10% of the key size windows. Hence if the key is 1024 bit = 128 byte
  17. * long, the window size is 10, and the file size is 500 bytes, it looks at
  18. * the randomness from bytes 0-128, then 10-138, next 20-148 etc.
  19. * Additionally to measuring the entropy, I added checking for the
  20. * arithmetical mean, and detecting couting bytes up- and downwards in the
  21. * beginning, middle or end of the file.
  22. * By having three randomness checks and evaluating the full key size with a
  23. * sliding window, the best keyfinding measures are in place, and much better
  24. * than in the described paper.
  25. *
  26. * However still beware: you will 1) receive some false positives, and 2)
  27. * Keyfinder can not find the exact start/end region of the key, it will
  28. * usually be some bytes before or after the reported file area.
  29. *
  30. * For usage hints, type "keyfinder -h"
  31. *
  32. * To compile: gcc -o keyfinder keyfinder.c -lm
  33. *
  34. */
  35. #include <stdio.h>
  36. #include <sys/types.h>
  37. #include <sys/stat.h>
  38. #include <fcntl.h>
  39. #include <unistd.h>
  40. #include <stdlib.h>
  41. #include <string.h>
  42. #include <ctype.h>
  43. #include <math.h>
  44. #define MINIMUM_RANDOMNESS 85
  45. #define KEY_SIZE 128
  46. #define WINDOW_SIZE 10
  47. #define DUMP_ROWS 16
  48. int minimal_randomness = MINIMUM_RANDOMNESS;
  49. char *prg;
  50. int ext_entropy;
  51. int ext_mean;
  52. int debug = 0;
  53. void help() {
  54. printf("Keyfinder v1.0 (c) 2005 by van Hauser / THC <vh@thc.org> www.thc.org\n");
  55. printf("\nSyntax: %s [-k KEY_SIZE] [-w WINDOW_SIZE] [-r MINIMUM_RANDOMNESS] FILE\n", prg);
  56. printf("\nOptions:\n");
  57. printf(" -k KEY_SIZE Key size to look for (default: %d byte [%d bit])\n", KEY_SIZE, KEY_SIZE * 8);
  58. printf(" -w WINDOW_SIZE Window size to check (default: %d byte)\n", WINDOW_SIZE);
  59. printf(" -r MINIMUM_RANDOMNESS Minimum %% of randomness for keys (default: %d%%)\n", MINIMUM_RANDOMNESS);
  60. printf(" -d Print debug output\n");
  61. printf("\nFinds binary crypto keys, crypto data and compressed data in files.\n");
  62. printf("The result is an indicator where the key could be, not a byte exact match.\n");
  63. printf("The randomness is calculated by the entropy, the arithmetic mean value and a\n");
  64. printf("counting check. Read more information in the header of the keyfinder.c file.\n");
  65. printf("Note: If -k is specified but not -w, -w will be 10%% of -k.\n");
  66. printf("Hints: (1) the smaller -k, the smaller should be -r\n");
  67. printf(" (2) the smaller -r the more false positives\n");
  68. printf(" (3) -w should be 1/8 to 1/20 of -k\n");
  69. printf(" (4) -k values are 128/256/512 byte for RSA/asymmetric keys\n");
  70. printf(" (5) -k 512 -> -r 95; -k 128 -> -r 85 \n");
  71. exit(-1);
  72. }
  73. /* Why is log2() in libm not working?? what a fucking #!+~$$!! */
  74. #define log2of10 3.32192809488736234787
  75. static double log2_(double x) {
  76. return (log2of10 * (log10(x)));
  77. }
  78. void calculate_randomness(unsigned char *buf, int buflen) {
  79. double ent = 0.0;
  80. double mean = 0.0;
  81. double datasum = 0.0;
  82. unsigned long ccount[256];
  83. double prob[256];
  84. int i, j = 0;
  85. for (i = 0; i < 256; i++)
  86. ccount[i] = 0;
  87. for (i = 0; i < buflen; i++)
  88. ccount[buf[i]]++;
  89. for (i = 0; i < 256; i++) {
  90. prob[i] = (double) ccount[i] / buflen;
  91. datasum += ((double) i) * ccount[i]; /**/
  92. }
  93. for (i = 0; i < 256; i++) {
  94. if (prob[i] > 0.0) {
  95. ent += prob[i] * log2_((1.0 / prob[i]));
  96. // printf("%f += %f * %f\n", ent, prob[i], log2_((1.0 / prob[i])));
  97. }
  98. }
  99. mean = datasum / buflen; /**/
  100. ext_mean = (mean - 127.5) / 1.275;
  101. if (ext_mean < 0)
  102. ext_mean = ext_mean * -1;
  103. ext_mean = 100 - ext_mean;
  104. ext_entropy = (ent * 100) / 8;
  105. if (debug) {
  106. printf("Entropy: %f bits (8 is totally random)\n", ent);
  107. printf("Mean: %1.4f (127.5 is totally random)\n", mean);
  108. }
  109. if (ext_entropy + ext_mean >= minimal_randomness) {
  110. /* check for counting in the beginning */
  111. for (i = 0; i < 8 && j == 0; i++)
  112. if (buf[i] + 1 != buf[i + 1])
  113. j = 1;
  114. if (j == 0)
  115. j = 2;
  116. if (j == 1)
  117. j = 0;
  118. for (i = 0; i < 8 && j == 0; i++)
  119. if (buf[i] - 1 != buf[i++ + 1])
  120. j = 1;
  121. if (j == 0)
  122. j = 2;
  123. if (j == 1)
  124. j = 0;
  125. /* check for counting in the middle */
  126. for (i = 0; i < 8 && j == 0; i++)
  127. if (buf[((buflen/2) - i) - 4] != buf[((buflen/2) - i) - 3] + 1)
  128. j = 1;
  129. if (j == 0)
  130. j = 2;
  131. if (j == 1)
  132. j = 0;
  133. for (i = 0; i < 8 && j == 0; i++)
  134. if (buf[((buflen/2) - i) - 4] != buf[((buflen/2) - i) - 3] - 1)
  135. j = 1;
  136. if (j == 0)
  137. j = 2;
  138. if (j == 1)
  139. j = 0;
  140. /* check for counting in the end */
  141. for (i = 1; i <= 8 && j == 0; i++)
  142. if (buf[buflen - i] != buf[(buflen - i) - 1] + 1)
  143. j = 1;
  144. if (j == 0)
  145. j = 2;
  146. if (j == 1)
  147. j = 0;
  148. for (i = 1; i <= 8 && j == 0; i++)
  149. if (buf[buflen - i] != buf[(buflen - i) - 1] - 1)
  150. j = 1;
  151. if (j == 0)
  152. j = 2;
  153. if (j == 1)
  154. j = 0;
  155. if (j == 2) {
  156. if (debug)
  157. printf("Counting detected, false positive, ignoring...\n");
  158. ext_mean = 0;
  159. ext_entropy = 0;
  160. }
  161. }
  162. }
  163. void dump_asciihex(unsigned char *string, int length, unsigned int offset) {
  164. unsigned char *p = (unsigned char *) string;
  165. unsigned char lastrow_data[16];
  166. unsigned int rows = length / DUMP_ROWS;
  167. unsigned int lastrow = length % DUMP_ROWS;
  168. unsigned int i, j;
  169. for (i = 0; i < rows; i++) {
  170. printf("%08hx: ", i * 16 + offset);
  171. for (j = 0; j < DUMP_ROWS; j++) {
  172. printf("%02x", p[(i * 16) + j]);
  173. if (j % 2 == 1)
  174. printf(" ");
  175. }
  176. printf(" [ ");
  177. for (j = 0; j < DUMP_ROWS; j++) {
  178. if (isprint(p[(i * 16) + j]))
  179. printf("%c", p[(i * 16) + j]);
  180. else
  181. printf(".");
  182. }
  183. printf(" ]\n");
  184. }
  185. if (lastrow > 0) {
  186. memset(lastrow_data, 0, sizeof(lastrow_data));
  187. memcpy(lastrow_data, p + length - lastrow, lastrow);
  188. printf("%08hx: ", i * 16 + offset);
  189. for (j = 0; j < lastrow; j++) {
  190. printf("%02x", p[(i * 16) + j]);
  191. if (j % 2 == 1)
  192. printf(" ");
  193. }
  194. while(j < DUMP_ROWS) {
  195. printf(" ");
  196. if (j % 2 == 1)
  197. printf(" ");
  198. j++;
  199. }
  200. printf(" [ ");
  201. for (j = 0; j < lastrow; j++) {
  202. if (isprint(p[(i * 16) + j]))
  203. printf("%c", p[(i * 16) + j]);
  204. else
  205. printf(".");
  206. }
  207. while(j < DUMP_ROWS) {
  208. printf(" ");
  209. j++;
  210. }
  211. printf(" ]\n");
  212. }
  213. }
  214. void dump_found(char *buf, int key_size, unsigned int block_count, int entropy, int mean) {
  215. printf("Found at block %u (Entropy is %d%% | Mean Deviation is %d%% = %d%%):\n", block_count * 64, entropy, mean, (entropy + mean) / 2);
  216. dump_asciihex(buf, key_size, block_count * 64);
  217. printf("\n");
  218. }
  219. int main(int argc, char *argv[]) {
  220. int key_size = KEY_SIZE;
  221. int window_size = 0;
  222. char *fn;
  223. FILE *f;
  224. char *buf;
  225. int i;
  226. int reading;
  227. unsigned int block_count = 0;
  228. prg = argv[0];
  229. if (argc < 2 || strcmp(argv[1], "-h") == 0 || strncmp(argv[1], "--h", 3) == 0)
  230. help();
  231. while ((i = getopt(argc, argv, "dw:r:k:")) >= 0) {
  232. switch(i) {
  233. case 'd':
  234. debug = 1;
  235. break;
  236. case 'w':
  237. window_size = atoi(optarg);
  238. break;
  239. case 'r':
  240. minimal_randomness = atoi(optarg);
  241. break;
  242. case 'k':
  243. key_size = atoi(optarg);
  244. break;
  245. default:
  246. help();
  247. }
  248. }
  249. if (key_size != KEY_SIZE) {
  250. if (window_size == 0)
  251. window_size = (key_size / 10) - 1;
  252. } else
  253. window_size = WINDOW_SIZE;
  254. if (key_size < 20 || key_size > 65535 || window_size < 1 || window_size >= key_size || minimal_randomness < 1 || minimal_randomness > 99) {
  255. fprintf(stderr, "Error: Wrong Values! Limits: 20 < key_size < 65535; 1 < window_size < key_size; 1 < minimal_randomness < 100\n");
  256. exit(-1);
  257. }
  258. if (key_size < window_size * 8)
  259. fprintf(stderr, "Warning: The window size is too large, -w should be 1/8 to 1/16 of -k\n");
  260. if (optind + 1 != argc)
  261. help();
  262. fn = argv[argc - 1];
  263. if ((f = fopen(fn, "r")) == NULL) {
  264. fprintf(stderr, "Error: Can not open file %s\n", fn);
  265. exit(-1);
  266. }
  267. if ((buf = malloc(key_size + window_size)) == NULL) {
  268. fprintf(stderr, "Error: malloc() failed\n");
  269. exit(-1);
  270. }
  271. memset(buf, 0, key_size + window_size);
  272. printf("Analyzing %s:\n", fn);
  273. // if (debug)
  274. printf("[Key Size: %d byte/%d bit, Window Size: %d byte, Minimal Randomness: %d%%]\n", key_size, key_size * 8, window_size, minimal_randomness);
  275. minimal_randomness = minimal_randomness * 2;
  276. if ((reading = fread(buf, 1, key_size, f)) > 0) {
  277. calculate_randomness(buf, reading);
  278. if ((ext_entropy + ext_mean) >= minimal_randomness && reading == key_size)
  279. dump_found(buf, key_size, block_count, ext_entropy, ext_mean);
  280. if (reading == key_size)
  281. reading = window_size;
  282. while (!feof(f) && reading == window_size) {
  283. if ((reading = fread(buf + key_size, 1, window_size, f)) > 0) {
  284. ++block_count;
  285. memmove(buf, buf + reading, key_size);
  286. calculate_randomness(buf, key_size);
  287. if ((ext_entropy + ext_mean) >= minimal_randomness)
  288. dump_found(buf, key_size, block_count, ext_entropy, ext_mean);
  289. }
  290. }
  291. }
  292. return 0;
  293. }